| 56 | |
| 57 | === Emendations of the raw text === |
| 58 | |
| 59 | {{{ |
| 60 | # normalize the zero in modern page numbers |
| 61 | s!○!〇!g; # white circle U+25CB --> ideographic number zero U+3007 |
| 62 | |
| 63 | # ignore outdentation in the preface |
| 64 | if ($line < 153) { s!<p x>!<p>!; } |
| 65 | |
| 66 | # pre-process notes that continue on the next line |
| 67 | |
| 68 | # fill in the unknown characters (MSi) |
| 69 | s!<001>!轂!g; # s!<001>!<unknown code="001" unicode="8F42">轂</unknown>!g; |
| 70 | s!<002>!<unknown code="002" unicode="2F88D">庶</unknown>!g; # the actual Unicode character 庶 breaks oXygen |
| 71 | |
| 72 | # clarify <?> (the list is not complete!) |
| 73 | s!<?>!<?>!; # line 811: fullwidth question mark U+FF1F --> ASCII question mark U+003F |
| 74 | s!愈<\?>!愈!g; # MSi: the reading is correct |
| 75 | s!丙、等。<?>而戊丙丁、與甲乙丙、又等。!丙、等。而戊丙丁、與甲乙丙、又等。!; # line 1041 |
| 76 | # (line 1041: MSi: It is in the middle of a sentence, but a period at this position is quite common nonetheless.) |
| 77 | |
| 78 | # missing line breaks (the list is not complete!) |
| 79 | s!小於兩直角。則此二橫直線。!小於兩直角。則此二橫<lb/>直線。!; # line 403; may have to do with the neighboring figure |
| 80 | s!俱小於直角。或幷之小於兩直角。!俱小於直角。或幷之小<lb/>於兩直角。!; # line 404 |
| 81 | |
| 82 | # normalize the hash in the table |
| 83 | s!#!#!g; # fullwidth number sign U+FF03 --> ASCII hash, i.e. number sign U+0023 |
| 84 | |
| 85 | # move the only table in the text (ECHO p.327) out of its surrounding sentence |
| 86 | s!却云十六與十二之比例。若!却云十六與十二之比例。!; # line 4562 |
| 87 | s!八與三、及二與四之比例。!若<lb/>八與三、及二與四之比例。!; #line 4573 |
| 88 | |
| 89 | # misc. emendations |
| 90 | s!N12<114608657010!N12x114608657010!; # line 5: replace "<" in library stamp junk |
| 91 | s!<pb 六><h>幾何原本 卷一之首</h>!<pb 六><rh>幾何原本 卷一之首</rh>!; # line 245 (obvious mistake) |
| 92 | s!<h>後支前己正論</h>!<p>後支前己正論</p>!; # line 2175 (Tian Miao: wrong tag) |
| 93 | if ($line == 2992) { s!<h>第三十四題</h>!<h>第十四題</h>!; } # line 2992 (obvious mistake) |
| 94 | }}} |