| author | Edward Betts <edwardbetts@gmail.com> |
| Fri Nov 02 15:45:41 2007 +0000 (9 months ago) | |
| changeset 228 | 2c01f1a1b2b8 |
| parent 175 | e1bcd3c22a83 |
| permissions | -rw-r--r-- |
1 # a python representation of the Open Library schema
2 # (run this to produce an html representation, or invoke
3 # it with the arguments "template edition" to print out
4 # template markup for an edition item.)
5
6 # Following is a python datastructure representing the field-schema for
7 # bibliographic items in ThingDB. Where the `count` attribute is not
8 # specified, its value is `'single'`. The types `string`, `text`, `url` (and
9 # perhaps `date`) may all be stored as "strings" in ThingDB, but the
10 # distinction here may help to render those strings appropriately in the UI.
11
12 schema_ordered = {
13
14 'author':
15 [
16 ('identifier', {
17 'type': 'string',
18 'count': 'multiple',
19 # 'marc_fields': ['100:abcd', '110:ab', '710:ab', '111:acdn', '711:acdn'],
20 'example': "Twain, Mark, 1835-1910",
21 'description': "unique id in some catalog" }),
22 ('name', { 'type': 'string', 'example': "Mark Twain", 'description': "human-readable name" }),
23 ('birth_date', { 'type': 'date', 'example': "1835" }),
24 ('death_date', { 'type': 'date', 'example': "1910" }),
25 ('bio', { 'type': 'text' })
26 ],
27
28 'edition':
29 [
30 ('source_record_loc', {
31 'title': "Source Record Locator",
32 'type': 'string',
33 'count': 'multiple',
34 'example': "marc_records_scriblio_net/part01.dat:29834:543",
35 'description': "a locator for the source record data" }),
36 ('source_record_id', {
37 'title': "Source Record Id",
38 'type': 'string',
39 'count': 'multiple',
40 'example': "LC:DLC:00000006",
41 'description': "a record identifier that is globally unique and that also can be constructed consistently from the contents of a record and an identifier for its source catalog" }),
42 ('author_identifier', {
43 'title': "Author Identifier",
44 'type': 'string',
45 'count': 'multiple',
46 'marc_fields': ['100:abcd author_id', '110:ab author_id', '111:acdn author_id'],
47 'example': "Twain, Mark, 1835-1910",
48 'description': "unique author id in some catalog" }),
49 # ('authors', { 'type': 'id-ref', 'count': 'multiple', 'example': 'a/Mark_Twain' }),
50 ('contributions', {
51 'title': "Contributions",
52 'type': 'string',
53 'count': 'multiple',
54 'marc_fields': ['700:abcde', '710:ab', '711:acdn'],
55 'example': 'Illustrated by: Steve Bjorkman' }),
56 ('title', {
57 'title': "Title",
58 'type': 'string',
59 'marc_fields': '245:ab clean_name',
60 'example': 'The adventures of Tom Sawyer' }),
61 ('subtitle', {
62 'title': "Subtitle",
63 'type': 'string',
64 'count': 'multiple',
65 }),
66 ('by_statement', {
67 'title': "By Statement",
68 'type': 'string',
69 'count': 'multiple',
70 'marc_fields': '245:c',
71 'example': 'Herman Melville ; [illustrated by Barry Moser]' }),
72 ('sort_title', {
73 'title': "Title for sorting",
74 'type': 'string',
75 'example': 'adventures of Tom Sawyer'
76 }),
77 ('other_titles', {
78 'title': "Other Titles",
79 'type': 'string',
80 'count': 'multiple',
81 'marc_fields': ['246:a', '730:a-z', '740:apn' ],
82 'example': "Mark Twain's The Adventures of Tom Sawyer" }),
83 ('work_title', {
84 'title': "Work Title",
85 'type': 'string',
86 'marc_fields': ['240:amnpr', '130:a-z'],
87 'description': "The 240 \"work title\" is used in the OCLC FRBR algorithm. The 130 is also used, and there should be either a 130 or a 240 in a record, but not both. It would be ideal if we could pick up either for the work title." }),
88 ('edition', {
89 'title': "Edition",
90 'type': 'string',
91 'marc_fields': '250:ab',
92 'example': '2nd. editon',
93 'description': 'information about this edition' }),
94 ('publisher', {
95 'title': "Publisher",
96 'type': 'string',
97 'marc_fields': '260:b clean_name',
98 'example': 'W. W. Norton & Co.' }),
99 ('publish_place', {
100 'title': "Publish Place",
101 'type': 'string',
102 'count': 'multiple',
103 'marc_fields': '260:a clean',
104 'example': 'New York' }),
105 ('publish_date', {
106 'title': "Publish Date",
107 'type': 'date',
108 'marc_fields': '008:7-10',
109 'example': '2006' }),
110 ('pagination', {
111 'title': "Pagination",
112 'type': 'string',
113 'marc_fields': '300:a',
114 'example': "viii, 383 p. :",
115 'description': "full pagination information" }),
116 ('number_of_pages', {
117 'title': "Number of Pages",
118 'type': 'int',
119 'example': '237',
120 'marc_fields': '300:a biggest_decimal',
121 'description': 'largest decimal found' }),
122 ('subjects', {
123 'title': "Subjects",
124 'type': 'string',
125 'count': 'multiple',
126 'marc_fields': ['600:abcd--x--v--y--z',
127 '610:ab--x--v--y--z',
128 '630:acdegnpqst--v--x--y--z',
129 '650:a--x--v--y--z',
130 '651:a--x--v--y--z'],
131 'example': 'Runaway children -- Fiction' }),
132 ('subject_place', {
133 'title': "Subject Places",
134 'type': 'string',
135 'count': 'multiple',
136 'marc_fields': ['651:a*', '650:z*'],
137 'example': "Venice (Italy)" }),
138 ('subject_time', {
139 'title': "Subject Times",
140 'type': 'string',
141 'count': 'multiple',
142 'marc_fields': ['600:y*', '650:y*', '651:y*'],
143 'example': '20th century' }),
144 ('genre', {
145 'title': "Genre",
146 'type': 'string',
147 'count': 'multiple',
148 'marc_fields': ['600:v*', '650:v*', '651:v*'],
149 'example': "Biography" }),
150 ('series', {
151 'title': "Series Information",
152 'type': 'string',
153 'count': 'multiple',
154 'marc_fields': ['440:av', '490:av', '830:av' ],
155 'example': "Oxford world's classics" }),
156 ('language', {
157 'title': "Language",
158 'type': 'string',
159 'marc_fields': '"ISO:" 008:35-37 +',
160 'example': 'ISO:tel',
161 'description': "coded or human-readable description of the text's language" }),
162 ('physical_format', {
163 'title': "Physical Format",
164 'type': 'string',
165 'count': 'multiple',
166 'marc_fields': '245:h' }),
167 ('notes', {
168 'title': "Notes",
169 'type': 'string',
170 'count': 'multiple',
171 'marc_fields': '500-599!505!520:a-z',
172 }),
173 ('description', {
174 'title': "Description",
175 'type': 'text',
176 'marc_fields': '520:a'
177 }),
178 ('exerpts', { 'type': 'text', 'count': 'multiple' }),
179 ('table_of_contents', {
180 'title': "Table of Contents",
181 'type': 'text',
182 'count': 'multiple',
183 'marc_fields': '505:art'
184 }),
185 ('cover_image', { 'type': 'url' }),
186 ('scan_contributor', { 'type': 'string' }),
187 ('scan_sponsor', { 'type': 'string' }),
188 ('dewey_number', {
189 'title': "Dewey Decimal Classification",
190 'type': 'string',
191 'count': 'multiple',
192 'marc_fields': '082:a',
193 'example': '914.3' }),
194 ('LC_classification', {
195 'title': "Library of Congress Classification",
196 'type': 'string',
197 'count': 'multiple',
198 'marc_fields': '050:ab',
199 'example': 'BJ1533.C4 L49' }),
200 ('ISBN', {
201 'type': 'string',
202 'count': 'multiple',
203 'marc_fields': ['020:a normalize_isbn', '024:a normalize_isbn'],
204 'example': '9780393926033',
205 'description': 'a 13-digit ISBN' }),
206 ('UCC_13', { 'type': 'string' }),
207 ('UPC', { 'type': 'string' }),
208 ('ISMN', { 'type': 'string' }),
209 ('DOI', { 'type': 'string' }),
210 ('LCCN', {
211 'type': 'string',
212 'marc_fields': '010:a normalize_lccn',
213 'example': "2006285320" }),
214 ('GTIN_14', { 'type': 'string' }),
215 ('oca_identifier', { 'type': 'string', 'example': 'albertgallatinja00stevrich' })
216 ]
217 }
218
219 schema = {}
220 for (typename, ordered_fields) in schema_ordered.iteritems ():
221 fields = {}
222 for (fname, fspec) in ordered_fields:
223 fields[fname] = fspec
224 schema[typename] = fields
225
226 def print_html ():
227 for (typename, fields) in schema_ordered.iteritems ():
228 print "<p><b>" + typename + "</b></p>"
229 print "<table border=\"1\"><tbody>"
230 print "<tr><th>Field</th><th>Type</th><th>MARC Fields</th><th>Example (Description)</th></tr>"
231 for (fname, fspec) in fields:
232 marc_fields = fspec.get ('marc_fields', [])
233 if (type (marc_fields) != list):
234 marc_fields = [marc_fields]
235 print "<tr>"
236 print "<td><b>" + fname + "</b></td>"
237 print "<td>" + fspec['type'] + ((fspec.get ('count', "single") == "multiple" and "*") or '') + "</td>"
238 print "<td>" + ", ".join (marc_fields) + "</td>"
239 print "<td>" + ((fspec.get ('example') and '"' + fspec['example'] + '"') or '') + ((fspec.get ('description') and " <i>(" + fspec['description'] + ")</i>") or '') + "</td>"
240 print "</tr>"
241 print "</tbody></table>"
242
243 def print_template (typename):
244 print "<dl class=\"metadata\">"
245 for (fname, fspec) in schema_ordered[typename]:
246 title = fspec.get ('title', fname)
247 description = fspec.get ('description', "")
248 multiple = (fspec.get ('count', "single") == "multiple")
249
250 print "$if page.d.get('%s'):" % fname
251 print "\t<dt title=\"%s\"><b>%s</b></dt>" % (description, title)
252 if multiple:
253 print "\t$for v in page.d.get('%s'): <dd>$v</dd>" % fname
254 else:
255 print "\t<dd>$page.%s</dd>" % fname
256 print "</dl>"
257
258 if __name__ == "__main__":
259 from sys import argv
260 if len (argv) == 3:
261 if argv[1] == "template":
262 typename = argv[2]
263 print_template (typename)
264 else:
265 print_html ()
266