comparison DESpecs/DESpecs_0_3.tex @ 1:e295e4c94593

version 2008-09-23
author Wolfgang Schmidle <wschmidle@mpiwg-berlin.mpg.de>
date Fri, 26 Nov 2010 18:42:09 +0100
parents DESpecs/DESpecs_0_2.tex@106aa1285f04
children
comparison
equal deleted inserted replaced
0:106aa1285f04 1:e295e4c94593
1 %!TEX TS-program = xelatex
2 %!TEX encoding = UTF-8 Unicode
3 %!TEX root = ../DESpecs.tex
4
5 \section{General}
6
7 Type the the entire content of one page, then go on to the next page. Do not mix the content of two pages.
8
9 \section{Structural Markup}
10
11 \subsection{Pages, page numbers, running heads}
12
13 \begin{mainrule}
14 Page breaks are marked by <pb>. If the page has a page number, it is written within the <pb> tag. If there is a running head on the page, it is marked by <rh> and </rh>. Type it immediately after the <pb> tag.
15 \end{mainrule}
16
17 \begin{clarification}
18 The position of the page number on the page will not be encoded.
19 A blank line may be inserted before the <pb> tag.
20 Type the <pb> and <rh> tags before you type any content of the page.
21 Do not type spaces within words.
22 If there is a horizontal line below the running head, do not type it.
23 \end{clarification}
24
25 \begin{example}
26
27 \includegraphics[scale=0.33]{pagenumber_runninghead}
28
29 \begin{typeLatin}
30 <pb 2><rh>GEOMET. ELEMENT. EVCLIDIS</rh> \\
31 sunt ӕquales. 16 Et hic quidem punctus, centrum circuli dicitur. \\
32 ...
33 \end{typeLatin}
34
35 \end{example}
36
37 \subsection{Headings}
38
39 \begin{mainrule}
40 Headings are marked by <h> and </h>.
41 \end{mainrule}
42
43 \begin{clarification}
44 All headings are tagged in the same way, regardless of the font size. ($\to$ maybe different rules for specific books). Do not type spaces within words.
45 \end{clarification}
46
47 \begin{example}
48
49 \includegraphics[scale=0.3]{headings}
50
51 \begin{typeLatin}
52 <h>EUCLIDIS \\
53 ELEMENTORUM \\
54 LIBER DECIMUS.</h>
55 \end{typeLatin}
56
57 or alternatively, if you are unsure whether each line is a separate heading:
58
59 \begin{typeLatin}
60 <h>EUCLIDIS</h> \\
61 <h>ELEMENTORUM</h> \\
62 <h>LIBER DECIMUS.</h>
63 \end{typeLatin}
64
65 \end{example}
66
67
68 \subsection{Paragraphs}
69
70 \begin{mainrule}
71 Paragraphs are marked by <p> and </p>.
72 \end{mainrule}
73
74 \begin{clarification}
75 If the first line of the paragraph is indented, this will not be encoded.
76 \end{clarification}
77
78 \begin{example}
79
80 \includegraphics[scale=0.4]{paragraph}
81
82
83 See also the example in section \ref{Structural markup general example}.
84 \end{example}
85
86 \subsection{Columns}
87
88 \begin{mainrule}
89 Columns are marked by <col> and </col>. The columns on a page are numbered from left to right.
90 \end{mainrule}
91
92 \begin{example}
93
94 \includegraphics[scale=0.6]{three_columns}
95
96 \begin{typeLatin}
97 <col 1>This is one \\
98 column ...</col> \\ \\
99 <col 2>This is \\
100 another \\
101 column.</col> \\ \\
102 <col 3>And there \\
103 might be \\
104 yet another \\
105 column.</col> \\
106 \end{typeLatin}
107
108 \end{example}
109
110
111 %\subsection{Blank lines}
112 %Do not insert blank lines. (Or: You can insert blank lines? An explicit "do not insert" rule will probably be ignored anyway?
113
114 \subsection{Marginal notes}
115
116 \begin{mainrule}
117 A marginal note is marked by <mg> and </mg>. It should be typed in separate lines, after the line it is the closest to.
118 \end{mainrule}
119
120 \begin{clarification}
121 Indicate the position of the note (left or right): <mg l> or <mg r>. Do not mark paragraphs within a marginal note, i.e. do not use the <p> tag.
122 \end{clarification}
123
124 %(The post-processing effort of adjusting marginal notes is small compared to the effort of typing them, so it does not make sense to make this rule too precise or to have complicated rule variations for marginal notes to the left and to the right of the main text. Or does it make sense to say something like: a blank line before/after the marginal note indicates that the note ist to hte right/left of the main text? Or <marginal left>, <marginal right> ? But again: How are the results of two typists compared? Do they know that having the marginal note one line above or below doesn't matter?)
125
126 \begin{example} (image left marginal note, image right marginal note)
127
128 \includegraphics[scale=0.9]{bsp_marginalnote_coimbricenses_232}
129
130 \begin{typeLatin}
131 main text \\
132 <marginal>marginal text\\
133 marginal text \\
134 marginal text</marginal> \\
135 main text
136 \end{typeLatin}
137
138 \end{example}
139
140 % Would <m> be better than <marginal>? Do they speak English or is an English word just longer to type for them?
141
142 \begin{exception}
143
144 \includegraphics[scale=0.35]{bsp_handmargin2_benedetti_174}
145
146 Do \emph{not} type handwritten notes.
147 \end{exception}
148
149
150 \subsection{Footnotes}
151
152 \begin{mainrule}
153 Footnotes are marked by <fn> and </fn>. Type the footnote where it appears on the page. Insert the footnote symbol in the <fn> tag. The corresponding footnote symbol in the main text is marked by <n>. Insert the footnote symbol in the <n> tag.
154 \end{mainrule}
155
156 \begin{clarification}
157 Ignore the superscript of the footnote symbol. Use <n> in the main text only if you can identify the symbol as footnote symbol and if there is a corresponding <fn> tag somewhere. Do not mark paragraphs within a footnote, i.e. do not use the <p> tag. Do not type horizontal lines.
158 \end{clarification}
159
160 \begin{example}
161 ...
162 \end{example}
163
164 %\begin{example}
165 %Example [image]: this word$^1$ has a footnote
166 %\begin{tabular}{l}
167 %this word \\
168 %<fn 1> footnote text \\
169 %footnote text</fn> \\
170 %has a footnote \\
171 %\end{tabular}
172 %Alternative:
173 %\begin{tabular}{l}
174 %this word 1 has a footnote \\
175 %<fn 1> footnote text \\
176 %footnote text</fn> \\
177 %\end{tabular}
178 %\end{example}
179 %Another alternative: Ignore the semantics of the footnote, write the footnote symbol in superscript in the main text, type in the footnote where it occurs on the text?
180 %Archimedes (note: <n> vs. <fn> !?)
181 %\begin{tabular}{l}
182 %this word...<n 1> has a footnote \\
183 %<fn 1> footnote text \\
184 %footnote text</fn> \\
185 %\end{tabular}
186
187 Anchored marginal notes
188
189 (what are common footnote symbols at this that time? Do we need to provide a list with <01> replacements for weird symbols?)
190
191 \subsection{Quotations}
192
193 \begin{example}
194
195 \includegraphics[scale=0.35]{quotation}
196
197 \end{example}
198
199
200 \subsection{Figures}
201
202 (Follow the workflow for the mechanics project? <p>text<figure/></p> ??)
203
204 (Does it make sense to make them distinguish between sheer ornaments and figures? Would be easy to distinguish at the post-processing stage.)
205
206 Where the figure occurs, Put a <figure> tag in a separate line.
207
208 \begin{example}
209
210 images: between paragraphs, between lines of one paragraph, left/middle/center position and the text is floating around it, between two columns (treat as to the right of the left column)?
211
212 \begin{typeLatin}
213 text</p> \\
214 <figure> \\
215 <p>text
216 \end{typeLatin}
217
218 \begin{typeLatin}
219 text \\
220 <figure> \\
221 text
222 \end{typeLatin}
223
224 And so on.
225 \end{example}
226
227
228 \subsection{Tables and lists}
229
230 \subsection{Catchwords and Signatures}
231
232 Catchwords and signatures at the bottom of the page should not be typed.
233
234 \begin{example}
235
236 \includegraphics[scale=0.35]{bsp_catchwordsignature_benedetti_191}
237
238 \end{example}
239
240
241 \subsection{Unrecognizable characters}
242
243 Obscure characters: Chinese firm should make list, e.g. <01> etc., use consistently.
244
245 Creases, etc.
246
247 General tag “there is a problem”, applicable to characters, words, paragraphs, or pages.
248
249 le*t*ter, le*tte*r, *letter*, <p *>, <page *> ?
250
251 (There is a considerable degree of decision-making involved. Can the versions of two typists still be compared?)
252
253
254 \section{Positional Markup}
255
256 %\subsection{}
257
258 <lang greek>, etc., for example with latin and greek columns? Or is it easily insertable during post-processing?
259
260 \subsection{Punctuation}
261
262 Regardless of the use of spaces in the book, there should be a blank after . , : ; ! ?, but not before.
263
264 (is it called space or blank? Blank to be confused with blank line?)
265
266 \qquad Word. Word, word: Word; word! Word?
267
268 Parentheses: Word (word word) word [word] word \{word\} word.
269
270 Archimedes: <sub> for subscript, <super> for superscript
271
272 \subsection{Hyphens}
273
274 \begin{mainrule}
275 %Line breaks will be typed as such, without additional tags.
276 If the line ends with a hyphen, type it.
277 \end{mainrule}
278
279 \begin{clarification}
280 Hyphens at the end of a line can have different shapes. Always type - regardless of its actual shape.
281 Do not insert a space at the end of the line.
282 \end{clarification}
283
284 % Do not insert a space at the end. (The idea is that without this rule the results of two different typists will invariably differ. On the other hand, this is easy to normalise in the post-processing stage. How does the Chines firm decide if the results of two typists are the same? A third person compares it and either finds or does not find ay differences? Then this rule would be superfluous and would in fact hinder the workflow.)
285 % (is singular "a word separation mark" more clear than plural?)
286
287 \begin{example}
288
289 \includegraphics[scale=0.35]{hyphen_normal}
290
291 \begin{typeLatin}
292 Oportet autem arcum in maiore circulo datum non e$-
293 \end{typeLatin}
294
295 \includegraphics[scale=0.35]{hyphen_schraeg}
296
297 \begin{typeLatin}
298 rint ... cui $u-
299 \end{typeLatin}
300
301 \includegraphics[scale=0.35]{hyphen_doppelt_schraeg}
302
303 \begin{typeLatin}
304 RENSIS CLARISSIMI PHILOSOPHI, MATHEMA-
305 \end{typeLatin}
306
307
308 %\begin{typeLatin}
309 %RENSIS CLARISSIMI PHILOSOPHI, MATHEMA- \\
310 %ticorum facilè principis, primùm ex Campano, deinde ex Theone Grӕco \\
311 %cõmentatore, interprete Bartholomӕo Zamberto Veneto,
312 %\end{typeLatin}
313
314 \end{example}
315
316 %(Is it asked too much to recognise word separation marks? But then, how should one type an “equality sign in inverse italics”?)
317
318
319 \subsection{Mathematical symbols and fraction}
320
321 \begin{example}
322
323 \includegraphics[scale=0.35]{fraction}
324
325 (this is one fraction, i.e. $\frac{1417203}{9999999}$ !)
326
327 \end{example}
328
329
330 \subsection{Latin Alphabet}
331
332
333 \subsubsection{Characters to be typed directly}
334
335 (from the ECHO data entry specifications)
336
337 ASCII characters should be used with their normal values, except as indicated below. Note that \emph{tilde} (by itself) should be entered directly as \textasciitilde.
338
339 The following characters with diacritics are to be typed directly:
340
341 \begin{tabular}{lll}
342 Characters with acute accent & á é í ó ú & Á É Í Ó Ú \\
343 Characters with grave accent & à è ì ò ù & À È Ì Ò Ù \\
344 Characters with circumflex accent & â ê î ô û & Â Ê Î Ô Û \\
345 Characters with umlaut/diaeresis & ä ë ï ö ü ÿ & Ä Ë Ï Ö Ü Ÿ \\
346 Characters with tilde & ã õ ñ & Ã Õ Ñ \\
347 Characters with cedilla & ç & Ç \\
348 Common ligatures & æ œ & Æ Œ \\
349 \end{tabular}
350
351 (Place this table more prominently? Larger font? Bold face?)
352
353 Owing to the high frequency of “long s” <∫ >, this character should be typed as \$.
354
355 long s as \$ or S or (s), as in e\$t, eSt, e(s)t, or directly (Mac German keyboard: ∫ is alt-b)? (version eSt: easy to type; problem with words in caps but this can be disambiguated automatedly; problem with words that start with a capital S)
356
357 [Is there an easy way to type õ (simply tilde-o, just as on a German keyboard)? If not: õ as ~o, or as (o)? This should work well if the typists are careful with the spaces before and after real brackets: e.g. quid(e) versus quid (e).]
358
359 [ӕ as (ae), etc.?]
360
361
362 (ECHO specifications, continued)
363
364 (Are these instructions intended for Chinese typists or for scholars? If the former, they should be changed or left out. For example, we should know whether their input method supports the composition of the accent with a certain character.)
365
366 XML Entity notation: Characters that cannot be conveniently be typed may be indicated by means of XML entities. The entities specified in ISO 8879 (see especially isolat1 and isolat2) should be used. More generally, some characters may be entered using conventions specified below:
367
368 For characters with accents enumerated above, if the text input method does not support the composition of the accent with a certain character, entities may be used thus. For instance, the operating system typically makes no provision for allowing the entry of a modified <q> — yet such characters are frequent in Latin materials. They may be typed as (e.g.):
369
370 \qquad \&qacute; \&qgrave; \&quml; \&qtilde;
371
372 The ampersand (\&) must be entered as the entity \&amp; to avoid confusion with its use in entitites.
373
374
375 \subsubsection{Italics}
376
377 Encode italics for words and whole lines with /.../. Example:
378
379 text /text/ text
380
381 /text text text/
382
383 \begin{tabular}{l}
384 /text text text/ \\
385 /text text text/ \\
386 /text text text/ \\
387 \end{tabular}
388
389 Paragraphs in italics: <p //>text</p>
390
391 <page //> ??
392
393 (Or will this end in disaster because <p //> and </p> look too similar? Alternative: <p italics>text</p>, <page italics>. General question: is it alright to use a symbol such as / if we already use it with a different meaning in a different context, i.e. in closing tags, even though the conexts cannot be mixed up easily?)
394
395 Problem of single non-italic characters within a paragraph in italics.
396
397 \subsubsection{Ligatures}
398
399 Resolve simple ligatures: fi, fl, ffi, ffl, st, ct [image: ct]
400
401 Examples:
402
403 \includegraphics[scale=0.2]{bsp_ligae_benedetti_13}
404 \includegraphics[scale=0.2]{bsp_ligct_benedetti_13}
405 \includegraphics[scale=0.2]{bsp_ligfi_benedetti_13}
406
407 \includegraphics[scale=0.2]{bsp_ligii_benedetti_13}
408 \includegraphics[scale=0.2]{bsp_ligsi_benedetti_13}
409 \includegraphics[scale=0.2]{bsp_ligss_benedetti_13}
410
411 \includegraphics[scale=0.2]{bsp_ligssi_benedetti_13}
412 \includegraphics[scale=0.2]{bsp_ligst_benedetti_13}
413 \includegraphics[scale=0.2]{bsp_ligshortst_benedetti_156}
414
415 Treat as normal character: \& (even better: do not mention \& in the specs)
416
417 * List of complex latin ligatures: Ligature, meaning (leave out?), encode as.
418
419 Encode as <01> (or <001> ?) etc. according to this list
420
421 \subsection{Greek Alphabet}
422
423
424
425 \subsubsection{Ligatures}
426
427 list: the alphabet, two different sigmas; stigma (sigma-tau ligature) will not be mentioned and typed as sigma
428
429 list: high frequency ligatures: ου, μεν, etc.
430
431 list: easy ligatures: γη, γω, δι, δο, δρ, ερ, ει, κο, λλ, μο, πα, πο, σκ, σι, στι (as σι ?), στο (as σο ?), τα, τι, το, υν, ψι, etc.
432
433 list: difficult and rare ligatures: [ευ], [μετα], [την], [των], etc. Some of them are, in fact, probably not that rare.
434
435 The \emph{exact} shape of the μετα ligature with the grave accent in the ligature is not listed in Faulmann 1880. Wallace 1923 has it, though. Wallace seems to be more focused than Faulmann, who wants to cover everything from antiquity until the 19th century.
436 %(Unfortunately, Wallace is completely handwritten.)
437 Neither one has the exact shape of the ευ ligature, so unless we provide specific lists for each book, there will be always some guessing involved. Ingram 1966, \emph{The Ligatures of Early Printed Greek}, has both, but deliberately leaves out simple ligatures such as γω and semi-simple ligatures such as ψι (see Ingram p.380) because he didn't think of Chinese typists. (Faulmann has ψι, but not γω.) In short, perhaps the easiest path for us would be to reproduce (a subset of) Ingram's list.
438
439 ($\to$ check the whole greek Euclid page: are all difficult ligatures contained in Ingram's list?)
440
441 Example (Euclid lat/gr, p.16):
442
443 %\begin{figure}[htb]
444 %\centering
445 \includegraphics[scale=0.3]{greek_text_with_ligatures}
446 %\caption{Sample text for Greek ligatures (Euclid lat/gr, p.16)}
447 %\label{picture greek ligatures}
448 %\end{figure}
449
450 %\ref{picture greek ligatures})
451 Transcription in four versions (with standardised β instead of ϐ, which is U+03D0): The first version is the easiest to read, which may be an advantage even with automated post-processing. Also, it makes it easier to encode the accents, see below.
452
453 % $\to$ The Times font in which this XeTeX document is set doesn't have stigma and the alternative beta, and the combination of spiritus asper/lenis and an accent looks weird on the screen (prints fine on the Konica printer). Use a different font? Times New Roman has stigma and alternative beta and could be used for these characters.
454
455 % $\to$ Greek characters in monospaced fonts: There are only a few monospaced fonts on the Mac, namely Courier, Courier New, Andale Mono, Monaco. Courier and Monaco don't have Greek characters. Andale Mono doesn't have complex characters such as alpha with accent and spiritus. Thus, only Courier New remains and can be seen below. Are we happy with it? The alternative would be a new monospaced font or a non-monospaced font such as Times/Times New Roman or Minion Pro.
456
457 \begin{typeGreek}
458 Πάππ(ου) τ(οῦ) Αλεξαν(δρ)έως Σ(υν)α(γω)(γῆ)ς \\
459 ἓβ(δο)(μο)ν. \\
460 Πε(ρι)έχ(ει) δὲ λήμμα(τα) τ(οῦ) ἀναλυο(μέν)(ου) (τό)(πο)υ. \\
461 Ο καλ(ού)(μεν)ος ἀναλυό(μεν)ος, Ερμόδωρε τέκνον, \\
462 κα(τὰ) σύ(λλ)η(ψι)ν ἰ(δί)α (τί)ς ἐ(στι)ν ὓλη (πα)ρε(σκ)[ευ]ασ(μέν)η, \\{}
463 [μετὰ] [τὴν] [τῶν] (κο)ινῶν (στο)ιχ(εί)ων (πο)ίη(σι)ν, (το)ῖς β(ου)λομένοις \\
464 \end{typeGreek}
465
466 (Last word: Either the alternative beta is not used at the beginning of the word or this is an alternative version of the capital beta; my guess is the former. Note that μέν is \emph{not} ligated here. The ου ligature is printed badly.)
467
468 \begin{typeGreek}
469 Πάππ<01> τ<01> Αλεξαν<07>έως Σ<23>α<04><03>ς \\
470 ἓβ<06><13>ν. \\
471 Πε<09>έχ<10> δὲ λήμμα<20> τ<01> ἀναλυο<02><01> <22><15>υ. \\
472 Ο καλ<01><02>ος ἀναλυό<02>ος, Ερμόδωρε τέκνον, \\
473 κα<20> σύ<12>η<24>ν ἰ<05>α <21>ς ἐ<18>ν ὓλη <14>ρε<16><25>ασ<02>η, \\
474 <26> <27> <28> <11>ινῶν <19>ιχ<10>ων <15>ίη<17>ν, <22>ῖς β<01>λομένοις \\
475 \end{typeGreek}
476
477 \begin{typeGreek}
478 Πάππ(01) τ(01) Αλεξαν(07)έως Σ(23)α(04)(03)ς \\
479 ἓβ(06)(13)ν. \\
480 Πε(09)έχ(10) δὲ λήμμα(20) τ(01) ἀναλυο(02)(01) (22)(15)υ. \\
481 Ο καλ(01)(02)ος ἀναλυό(02)ος, Ερμόδωρε τέκνον, \\
482 κα(20) σύ(12)η(24)ν ἰ(05)α (21)ς ἐ(18)ν ὓλη (14)ρε(16)(25)ασ(02)η, \\
483 (26) (27) (28) (11)ινῶν (19)ιχ(10)ων (15)ίη(17)ν, (22)ῖς β(01)λομένοις \\
484 \end{typeGreek}
485
486 \begin{typeGreek}
487 Πάππ01 τ01 Αλεξαν07έως Σ23α0403ς \\
488 ἓβ0613ν. \\
489 Πε09έχ10 δὲ λήμμα20 τ01 ἀναλυο0201 2215υ. \\
490 Ο καλ0102ος ἀναλυό02ος, Ερμόδωρε τέκνον, \\
491 κα20 σύ12η24ν ἰ05α 21ς ἐ18ν ὓλη 14ρε1625ασ02η, \\
492 26 27 28 11ινῶν 19ιχ10ων 15ίη17ν, 22ῖς β01λομένοις \\
493 \end{typeGreek}
494
495 The first version makes it easier to encode the accents. Alternative:
496
497 \begin{typeGreek}
498 Πάππ<01> τ<01\textasciitilde> Αλεξαν<07>έως Σ<23>α<04><03\textasciitilde>ς \\
499 ἓβ<06><13>ν. \\
500 Πε<09>έχ<10> δὲ λήμμα<20> τ<01\textasciitilde> ἀναλυο<02´><01> <22´><15>υ. \\
501 Ο καλ<01´><02>ος ἀναλυό<02>ος, Ερμόδωρε τέκνον, \\
502 κα<20`> σύ<12>η<24>ν ἰ<05´>α <21´>ς ἐ<18>ν ὓλη <14>ρε<16><25>ασ<02´>η, \\
503 <26`> <27`> <28\textasciitilde> <11>ινῶν <19>ιχ<10´>ων <15>ίη<17>ν, <22>ῖς β<01>λομένοις \\
504 \end{typeGreek}
505
506 Stigma-ligatures: (στι), or encode as (ςι), i.e. end-sigma and iota, and disambiguate later, or with proper stigma character U+03DB, i.e. (ϛι)? Probably (στι), because they will have to look it up anyway. Tilde or circumflex: The circumflex in the book looks like a modern tilde. Encode as circumflex anyway? (My Greek textbooks used the tilde, but it was called circumflex.)
507
508 \paragraph{Beta Code}
509
510 The same text in Beta Code, with additional < and > for ligatures (probably a bad choice as there are possible collisions with milestone tags, e.g. <ti/>), capital and small letters rather than capital letters with and without an asterisk, comma (rather than apostrophe as in an ancient version of Beta Code) to denote comma in the Greek text. Thesaurus Linguae Graecae (TLG) suggests s1 for mid-sigma and s2 for end-sigma, or alternatively always type s and use disambiguation rules. (German) wikipedia says: use s for mid-sigma and j for end-sigma, which I did.
511
512 \begin{typeBetacode}
513 Pa/pp<ou> t<ou=> Alecan<dr>e/wj S<un>a<gw><gh=>j \\
514 e(b<do><mo>n. \\
515 Pe<ri>e/x<ei> de\bs lh/mma<ta> t<ou=> a)naluo<me/n>ou <to/><po>u. \\
516 O kal<ou/><men>oj a)naluo/<men>oj, Ermo/dwre te/knon, \\
517 ka<ta\bs> su/<ll>h<yi>n i)<di/>a <ti/>j e)<sti>n u(lh <pa>re<sk><eu>as<me/n>h, \\
518 <meta\bs> <thn\bs> <tw=n> <ko>inw=n <sto>ix<ei/>wn <po>i/h<si>n, <to>i=j b<ou>lome/noij
519 \end{typeBetacode}
520
521 Converters between Beta Code and Unicode are readily available. I would find it more natural to type the diacritics before rather than after the character, just as á is typed as ´a, but it would probably make sense not to deviate from the standard (which may mean that capital A is typed as *a). SophoKeys (for Mac) generates Unicode from Beta Code on the fly while one is typing (and also accepts shift-a instead of *a).
522
523 For Beta Code: It may be easier to type if one is familiar with the Latin alphabet. Against Beta Code: If the typists use a keyboard with Greek letters, even if they don't know the Greek language, it may help that they can immediately compare the shape of the letter in the book and in the typed text (but see above). Problem: Most books will contain Greek as well as Latin characters, so they will probably use a standard keyboard.
524
525 The TLG uses Beta Code even though they have heard of Unicode 5.1, so they might have reasons? The main reason seems to be backwards compatibility, however, as they use it since 1981. Is that an argument for us? Is compatibility with the Perseus project, which is mirrored at the MPIWG (seems to be down at the moment, though), desirable? (Even if the Chinese typists use Beta Code, our xml files will use Unicode, contrary to TLG and Perseus.)
526
527 By the way, the TLG's 2004 revision of Beta Code includes many unusual characters such as idiosyncratic abbreviations of ειναι (symbol \#1515), but no ligatures apart from ae etc.
528
529
530 Rule for vertical accents as in (τί)ς?
531
532 \section{More examples}
533
534 \subsection{Example 1}
535 \label{Structural markup general example}
536
537 Figures as <fig>. Paragraphs are tagged. Blank lines may or may not be inserted between paragraphs. Line breaks are typed, but without explicit tag. Centered text becomes left-aligned. Spaces between the letters of a word in caps are not typed. Rule for spaces before/after punctuation. Rule for font size? Word breaks as - regardless of their actual shape? \$ for long s. Diacritics. Ornamental letters as simple letters.
538
539 \begin{example} (Euclid latin, p.9)
540
541 \includegraphics[scale=0.55]{bsp_paragraph_euclidlat_9}
542
543 \begin{typeLatin}
544 <fig> \\
545 <h><fig>EVCLIDIS MEGA \\
546 RENSIS CLARISSIMI PHILOSOPHI, MATHEMA- \\
547 ticorum facilè principis, primùm ex Campano, deinde ex Theone Grӕco \\
548 cõmentatore, interprete Bartholomӕo Zamberto Veneto, \\
549 Geometricorũ elementorum Liber primus.</h> \\
550 <h>_Ex Campano, triplex principiorum genus._</h> \\
551 <h>Primùm, Diffinitiones.</h> \\
552 <p>PUnctus e\$t, cuius pars non e\$t. 2 Linea, \\
553 e\$t Lõgitudo \$ine latitudine: 3 cuius quidẽ \\
554 ... </p>
555 \end{typeLatin}
556
557 \end{example}
558
559 insert </p><p> after the line in italics (Ex Campano etc.)? It's difficult to decide whether a new paragraph begins here or not.
560
561 (possible book-specific rule: space after sentence number)
562
563
564 \section{Book-specific Specifications}
565
566 \subsection{A Scheme for Book-specific Specifications}
567
568 \subsection{An Example}
569
570 \appendix
571 \section{List of all tags}
572
573 For example <pb>, <hd>. Indicate whether there is a corresponding closing tag or not.
574
575 \section{List of all trial-and-error issues}
576
577 For example: We will see what they will make out of marginal notes that consist of more than one paragraph.
578
579 \section{Fonts used}
580
581 Headings in Helvetica, normal text in DejaVu Serif (was Times), except small caps which are currently in Hoefler Text because I couldn't get XeTeX to produce small caps in any other font yet, latin and greek characters in normal text in DejaVu Serif, latin examples in DejaVu Sans Mono scale 0.9, greek examples in DejaVu Sans Mono scale 0.8.
582
583 \section{To do}
584
585 At the end: Make sure that example images and the corresponding text are on the same page. Introduce shorter tag names if there are no naming collisions.
586
587