Annotation of texttool-architecture/soft-search.tex, revision 1.1

1.1     ! bcfuchs     1: \subsubsection{rec.cgi (register text)}
        !             2: \label{sec:rec.cgi}
        !             3: 
        !             4: \paragraph
        !             5: On the ECHO server, the registration of new texts is implemented by
        !             6: means of a cgi script, reg.cgi
        !             7: (archimedes/web/cgi-bin/toc/admin/reg.cgi ). reg.cgi retrieves a
        !             8: metadata file  in MPIWG archive metadata format from the entered uri
        !             9: (currently only local paths are supported ) and constructs from this
        !            10: file a toc.cgi object file (see below) , which it writes to toc.cgi's
        !            11: data section. [corpus???] It should be stressed that this is a
        !            12: registration procedure developed for a particular implementation of
        !            13: toc.cgi and not a part of the core application. 
        !            14: 
        !            15: \paragraph
        !            16: reg.cgi takes two parameters, path and show.  Path should give the
        !            17: local path to the metadata file for the text that is being
        !            18: registered. If ``show'' is set to 1, reg.cgi will return for
        !            19: inspection the toc.cgi object file that it has built out of the
        !            20: submitted metadata file. 
        !            21: 
        !            22: \paragraph{input metadata file}
        !            23: 
        !            24: The input metadata file must have the following form
        !            25: 
        !            26: \paragraph
        !            27: \begin{verbatim}
        !            28: <resource>
        !            29:     ...
        !            30:     <meta>
        !            31:       <meta>
        !            32:                 <bib type=''Book''>
        !            33: 
        !            34: <title>Mainzer Untergerichtsordnung (von 1534)</title>
        !            35: <author>anon</author>
        !            36: <year>1580</year>
        !            37:         <texttool><display>yes</display>
        !            38:    <image>pageimgtif</image>
        !            39:    <text>/mpiwg/online/experimental/echo_DRQEdit_test/anon_Mainz_1580/fulltextDW/mainzugo02_utf8.xml</text>
        !            40:    <pagebreak>pb</pagebreak><presentation>01-presentation/info.xml</presentation></texttool></meta>
        !            41: 
        !            42:     </meta>
        !            43: 
        !            44: \end{verbatim}
        !            45: \paragraph{archimedes object registration}
        !            46: 
        !            47: \subsubsection{toc.cgi (display text)}
        !            48: \label{sec:toc.cgi}
        !            49: 
        !            50: \paragraph{plan of this section }
        !            51: 
        !            52: \begin{enumerate}
        !            53: \item An overview of toc.cgi architecture
        !            54: \item A walk-through of typical cgi queries for toc.cgi
        !            55: \item An index of cgi parameters and values with short descriptions of function
        !            56: \end{enumerate}
        !            57: 
        !            58: \paragraph{Overview of toc.cgi architecture}
        !            59: 
        !            60: \subparagraph{}
        !            61: toc.cgi is a perl script for displaying collections of xml texts and 
        !            62: linking them to related resources such as page-images, morphological
        !            63: analysis, commentaries, dictionaries, etc. It implements generic methods
        !            64: for resource-linking provided by a series of perl modules which are in
        !            65: turn based mainly on generic open-source tools for xml manipulation and networking
        !            66: written in C. 
        !            67: 
        !            68: \subparagraph{toc.cgi collections--Network transparency}
        !            69: Each of the collections in toc.cgi is a ``virtual'' collection, that
        !            70: is, a collection of links or uri's to resources that reside somewhere on an accessible
        !            71: network, local or remote.  
        !            72: 
        !            73: \subparagraph{toc.cgi collections--remote resources}
        !            74: 
        !            75: What is at the other end of the link is of no concern to toc.cgi, as
        !            76: long as the resource referenced by the link meets minimal toc.cgi
        !            77: requirements--how the resource is actually implemented and exposed is
        !            78: a matter for the resource provider. The link may, for instance, point
        !            79: directly to an xml text or it may point to a container which exposes a
        !            80: particular xml view of an underlying resource that is perhaps not in
        !            81: xml format at all. 
        !            82: 
        !            83: 
        !            84: \subparagraph{resource registry}
        !            85: 
        !            86: 
        !            87: 
        !            88: 
        !            89: \paragraph{cgi parameters -- standard queries}
        !            90: 
        !            91: \htmladdnormallink{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=corpus }{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=corpus }
        !            92: \newline
        !            93: \newline
        !            94: get a listing of corpora
        !            95: 
        !            96: 
        !            97: \htmladdnormallink{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpusmanifest }{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpusmanifest }
        !            98: \newline
        !            99: \newline
        !           100: get an xml listing of corpora 
        !           101: 
        !           102: 
        !           103: \htmladdnormallink{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi }{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi }
        !           104: \newline
        !           105: \newline
        !           106: get a listing of works in default corpus
        !           107: 
        !           108: \htmladdnormallink{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?corpus=1 }{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?corpus=1 }
        !           109: \newline
        !           110: \newline
        !           111: get a listing of works in corpus 1 [default corpus = 0]
        !           112: 
        !           113: \htmladdnormallink{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpuslist }{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpuslist }
        !           114: \newline
        !           115: \newline
        !           116: get an xml listing of works in default corpus 
        !           117: 
        !           118: \htmladdnormallink{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpuslist;corpus=1 }{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpuslist;corpus=1 }
        !           119: \newline
        !           120: \newline
        !           121: get an xml listing of works in corpus 1
        !           122: 
        !           123: \htmladdnormallink{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?dir=baifl_renav_006_la_1537;step=thumb }{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?dir=baifl_renav_006_la_1537;step=thumb }
        !           124: \newline
        !           125: \newline
        !           126: get a work from default corpus with thumbnail navbar displayed left
        !           127: 
        !           128: 
        !           129: \htmladdnormallink{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?dir=jorda_ponde_050_la_1533;step=thumb;ftype=thumbright }{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?dir=jorda_ponde_050_la_1533;step=thumb;ftype=thumbright }
        !           130: \newline
        !           131: \newline
        !           132: get a work from default corpus with thumbnail navbar displayed right
        !           133: 
        !           134: \htmladdnormallink{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?dir=jorda_ponde_050_la_1533;step=textonly;corpus=;page=22 }{ http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?dir=jorda_ponde_050_la_1533;step=textonly;corpus=;page=22 }
        !           135: \newline
        !           136: \newline
        !           137: get a page of text from a work from default corpus 
        !           138: 
        !           139: 
        !           140: 
        !           141: 
        !           142: \subsubsection{Indexing}
        !           143: \label{sec:indexing}
        !           144: 
        !           145: \paragraph{Status quo ECHO}
        !           146: Currently indexing is not implemented on the ECHO server.
        !           147: 
        !           148: \paragraph{Plan ECHO}
        !           149: 
        !           150: \begin{enumerate}
        !           151: \item construct remote (141.14.236.86) index for each file at
        !           152:   per-change or daily intervals
        !           153: \item store indices locally in
        !           154: archimedes/data/db/PROJECT_NAME/CORPUS_NAME/WORK
        !           155: \item 2 progs on server 1. cgi: indexer 2. backend da_remote
        !           156: \item 2 progs on client 1. cgi: sendindex 2. backend getindex
        !           157: \item indexing transaction handled by two cgi scripts, one on the
        !           158:   server the other on the client [this is the 1st implementation bcs
        !           159:   its easiest and there are no port issues, but probably it'd be
        !           160:   better to have a separate port]. 
        !           161: \item client cgi: getindex -- sends 1.  list of files to index
        !           162:   2. uri to which xml notification of completion is to be sent. Upon
        !           163:   notification, activates backend prog that fetches and installs the
        !           164:   indices.  
        !           165: \item server cgi: indexer receives filelist and notification
        !           166:   addess. Activates backend that fetches files, indexes, places
        !           167:   completed indexes in a networked location, then sends xml
        !           168:   notification back to client. 
        !           169: \item single script provides backend access to indices 
        !           170: \item leave front-end issues like display, collection and navigation
        !           171:   to web-design programmers. Do only a  sample for now. 
        !           172: \end{enumerate}
        !           173: 
        !           174: \subsubsection{Morphology}
        !           175: \label{sec:morphology}
        !           176: 
        !           177: 
        !           178: \subsubsection{Dictionary server}
        !           179: \label{sec:dictionary-server}
        !           180: 
        !           181: 
        !           182: \subsubsection{helper programs}
        !           183: 
        !           184: \paragraph{addarch.pl ARCHIMEDES} 
        !           185: 
        !           186: Automatically registers new texts as toc.cgi objects when they appear in
        !           187: cvs. Automatically updates relevant morphological indices (slow!) each
        !           188: time a cvs update occurs. This program is called by a hook in the cvs
        !           189: ``loginfo'' configuration file. 
        !           190: 
        !           191: 
        !           192: \paragraph{makelemma.pl ARCHIMEDES}
        !           193: 
        !           194: Updates lemmatization indices. 
        !           195: Parameters: 
        !           196: No parameter--update all lemmatization indices
        !           197: [latin | ital | greek | en | nl | de]--  update this language
        !           198: 
        !           199: \paragraph{makefast.pl ARCHIMEDES} 
        !           200: 
        !           201: Updates the toc.cgi morphology indices
        !           202: Parameters
        !           203: No parameter--update all lemmatization indices
        !           204: [latin | ital | greek | en | nl | de]--  update this language
        !           205: 
        !           206: \subsubsection{summary of differences btwn the archimedes toc.cgi
        !           207:   implementation and the echo toc.cgi impelementation (toc.x.cgi)}
        !           208: 
        !           209: \paragraph{missing in archimedes}
        !           210: \begin{enumerate}
        !           211: 
        !           212: \item html templates (coded but phased out of cvs branch)
        !           213: \end{enumerate}
        !           214: 
        !           215: \paragraph{missing in echo}
        !           216: \begin{enumerate}
        !           217: 
        !           218: \item word-coloring?
        !           219: \item remote text method may work differently
        !           220: \end{enumerate}
        !           221: 
        !           222: \paragraph{differences}
        !           223: \begin{enumerate}
        !           224: \item structure of info.xml
        !           225: \item resource-discovery algorithm for info.xml
        !           226: \end{enumerate}
        !           227: 
        !           228: 
        !           229: 
        !           230: %%% Local Variables: 
        !           231: %%% mode: latex
        !           232: %%% TeX-master: "texttools"
        !           233: %%% End: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>