Annotation of storage/meta/meta-format.tex, revision 1.12

1.1       casties     1: \documentclass[a4paper]{article}
                      2: 
                      3: \usepackage[latin1]{inputenc}
                      4: \usepackage[T1]{fontenc}
                      5: \usepackage{ae}
                      6: %\usepackage{times}
                      7: %\usepackage{courier}
                      8: 
                      9: % create in-text links black (with PDF)
1.6       casties    10: \usepackage[colorlinks=true,linkcolor=black]{hyperref}
1.1       casties    11: % Format URLs nicely (without PDF)
1.6       casties    12: %\usepackage{url}
1.1       casties    13: 
                     14: 
                     15: \title{A simple metadata format for resource bundles}
                     16: 
1.4       casties    17: \author{Robert Casties, Dirk Wintergrün, Hans-Christoph Liess}
1.1       casties    18: 
1.12    ! casties    19: \date{V1.1.0 of 5.12.2003}
1.1       casties    20: 
                     21: \begin{document}
                     22: 
                     23: \maketitle
                     24: 
                     25: \tableofcontents
                     26: 
                     27: 
                     28: \section{File and directory names}
                     29: \label{sec:file-directory-names}
                     30: 
                     31: File and directory names should not contain spaces. Allowed characters
                     32: in filenames are only the alphanumeric set a-z, A-Z, 0-9, hyphen
                     33: ``-'', underscore ``\_'' and dot ``.''.
                     34: 
1.12    ! casties    35: Files and directories with names that contain illegal characters must
        !            36: be transformed to allowed names. A proposition for a simple
        !            37: transformation rule is
        !            38: 
        !            39: \begin{itemize}
        !            40: \item whitespace characters (e.g. blank, tab, cr, lf) are replaced by
        !            41:   hyphens ``-''
        !            42: 
        !            43: \item other illegal characters are replaced by underscores ``\_''.
        !            44: \end{itemize}
        !            45: 
        !            46: This rule does not provide a reversible mapping to the original
        !            47: illegal file name and it does not provide a collision-free mapping,
        !            48: i.e. two different illegal file names might be mapped to the same
        !            49: allowed file name. Additional precautions for these cases must be
        !            50: taken.
1.1       casties    51: 
1.4       casties    52: 
                     53: \section{Metadata files}
                     54: \label{sec:metadata-files}
                     55: 
                     56: The metadata information is stored in the XML format documented below
                     57: in special files in the resource directory. Two forms of metadata
                     58: files are possible:
                     59: \begin{itemize}
                     60: \item a file named \texttt{index.meta} in a directory.
                     61: 
                     62: \item a file named like the data file it describes with an
                     63:   additional extension \texttt{.meta}. For example metadata for the
                     64:   file \texttt{0001.tif} would be in a file \texttt{0001.tif.meta}.
                     65: \end{itemize}
                     66: 
                     67: The resource directory must contain an \texttt{index.meta} file with
                     68: information about the resource as a whole. Other directories can
                     69: contain \texttt{index.meta} files.
                     70: 
                     71: Additional information about single data files that are part of the
                     72: resource can either be put in \texttt{file} tags in the
                     73: \texttt{index.meta} file or in separate \emph{filename}\texttt{.meta}
                     74: files for each data file. Information from the directory level file is
                     75: inherited at the file level.
                     76: 
                     77: 
1.1       casties    78: \section{Resource format}
                     79: \label{sec:mpiwg-doc}
                     80: 
                     81: In this description elements marked ``optional'' need not be supplied
                     82: by the provider of the resource and may be absent in all versions of
                     83: the metadata file. Elements marked ``required'' must be supplied by
                     84: the provider of the resource. Elements marked ``deduced'' can be
                     85: supplied by the provider of the resource but can also be provided by
1.4       casties    86: automatic scripts later in the process, these elements must be present
1.1       casties    87: in the final file.
                     88: 
1.12    ! casties    89: File and directory paths in the metadata file use the conventional
        !            90: Unix file separator slash ``/''.
        !            91: 
1.11      casties    92: The outer container element is \texttt{resource}. It has the following
                     93: \textbf{attributes}:
                     94: 
                     95: \begin{description}
1.12    ! casties    96: \item[type] sub-type of resource (e.g. ``ECHO'', ``MPIWG'') --
        !            97:   optional.
1.11      casties    98:   
1.12    ! casties    99: \item[version] version number of metadata format (currently 1.1) --
1.11      casties   100:   required.
                    101: \end{description}
                    102: 
                    103: \noindent The allowed \textbf{elements} inside \texttt{resource} are:
1.1       casties   104: 
                    105: \begin{description}
                    106: \item[description] An informal textual description of the
                    107:   resource -- optional.
                    108: 
                    109: \item[name] The filename of the resource (name of the directory this
                    110:   file is contained in) -- required.
                    111: 
                    112: \item[creator] The name of the project or person that created the
                    113:   resource -- optional.
1.4       casties   114:   
                    115: \item[archive-creation-date] The time and date the archive collection
                    116:   was created -- deduced.
1.1       casties   117: 
1.4       casties   118: \item[archive-storage-date] The time and date the archive was written
                    119:   to permanent storage -- deduced (must not be set by the user).
1.1       casties   120: 
                    121: \item[archive-path] The full path to the resource directory inside the
1.5       casties   122:   whole archive collection, including the resource directory -- deduced.
1.12    ! casties   123: 
        !           124: \item[archive-id] The ID for this document in the archive --
        !           125:   required.
1.1       casties   126:   
                    127: \item[derived-from] Container for the description of the original
                    128:   resource if this resource is a modified version of another resource
                    129:   -- optional.
                    130: 
                    131:   \begin{description}
1.12    ! casties   132:   \item[archive-id] The ID of the original resource
        !           133:     -- required.
        !           134: 
1.1       casties   135:   \item[archive-path] The full path to the original resource
1.12    ! casties   136:     -- deduced.
1.1       casties   137: 
                    138:   \item[description] An informal textual description of the relation
                    139:   of this resource to the original resource -- optional.
                    140:   \end{description}
                    141:   
                    142: \item[linked-with] Container for the description of another
                    143:   resource when this resource is a linked copy of another resource
                    144:   -- optional.
                    145: 
                    146:   \begin{description}
1.12    ! casties   147:   \item[archive-id] The ID of the linked resource
        !           148:     -- required.
        !           149: 
1.1       casties   150:   \item[archive-path] The full path to the linked resource
1.12    ! casties   151:     -- deduced.
1.1       casties   152: 
                    153:   \item[description] An informal textual description of the relation
                    154:   of this resource to the linked resource -- optional.
                    155:   \end{description}
                    156:   
1.12    ! casties   157: \item[media-type] \label{tag-media-type} The main media type of this
        !           158:   resource -- required.\\ The main media type can be overridden by
        !           159:   \texttt{media-type}s in subdirectories. Possible types are
        !           160:   \begin{itemize}
        !           161:   \item \texttt{image}
        !           162: 
        !           163:   \item \texttt{text}
        !           164: 
        !           165:   \item \texttt{audio}
        !           166: 
        !           167:   \item \texttt{video}
        !           168: 
        !           169:   \item \texttt{data} for other type of data
        !           170:   \end{itemize}
1.1       casties   171:   
                    172: \item[meta] Additional metadata information about the resource --
                    173:   optional.\\ For a description of additional metadata see below.
                    174: 
                    175: \item[dir] Container for the description of a subdirectory -- required
                    176:   (when there are subdirectories).\\ \texttt{dir} tags should not be
                    177:   nested. Directories at lower levels are identified by their
                    178:   \texttt{path}.
                    179: 
                    180:   \begin{description}
                    181:   \item[description] An informal textual description of the
                    182:     subdirectory -- optional.
                    183: 
                    184:   \item[name] The name of the subdirectory -- required.
                    185:     
1.12    ! casties   186:   \item[original-name] A text string associated with the directory as
        !           187:     original name -- optional. (E.g. if the data in this directory
        !           188:     came from an external source and had a name that had to be changed
        !           189:     according to section~\ref{sec:file-directory-names} but it should
        !           190:     be possible to reference the original name.)
        !           191:     
1.1       casties   192:   \item[path] The directory path of this subdirectory relative to the
1.5       casties   193:     resource's root directory (excluding the directory itself) --
                    194:     required (may be empty or omitted if the directory is a direct
                    195:     child of the resource's root directory).
1.1       casties   196:     
                    197:   \item[meta] Additional metadata information about the directory --
                    198:     optional.\\ For a description of additional metadata see below.
                    199:   \end{description}
                    200:   
                    201: \item[file] Container for the description of a file -- deduced.\\
                    202:   \texttt{file} tags should not be nested in \texttt{dir} tags. Files
                    203:   at lower directory levels are identified by their \texttt{path}.
                    204: 
                    205:   \begin{description}
                    206:   \item[description] An informal textual description of the
                    207:     file -- optional.
                    208: 
                    209:   \item[name] The name of the file -- required.
                    210:     
1.12    ! casties   211:   \item[original-name] A text string associated with the file as
        !           212:     original name -- optional. (E.g. if this file came from an
        !           213:     external source and had a name that had to be changed according to
        !           214:     section~\ref{sec:file-directory-names} but it should be possible
        !           215:     to reference the original name.)
        !           216:     
1.1       casties   217:   \item[path] The directory path of this file relative to the
1.5       casties   218:     resource's root directory (excluding the file itself) -- required
                    219:     (may be empty or omitted if the file is in the resource's root
                    220:     directory).
1.7       casties   221:     
                    222:   \item[date] The file's modification or creation date\footnote{The
                    223:       preferred time and date format is ``YYYY/MM/DD HH:MM:SS''},
                    224:     whichever is more recent -- optional.
1.1       casties   225: 
                    226:   \item[modification-date] The file's modification date -- optional.
                    227: 
                    228:   \item[creation-date] The file's creation date -- optional.
1.7       casties   229:     
1.1       casties   230:   \item[size] The file size -- deduced.
                    231:     
                    232:   \item[mime-type] The file's mime-type -- optional.
                    233: 
                    234:   \item[md5cs] MD5 checksum of the file content -- optional.
                    235:     
                    236:   \item[meta] Additional metadata information about the file --
                    237:     optional. For a description of additional metadata see below.
                    238:   \end{description}
                    239:   
                    240: \end{description}
                    241: 
                    242: 
                    243: 
                    244: \section{Additional metadata}
                    245: \label{sec:additional-metadata}
                    246: 
                    247: All elements with \texttt{meta} tags can contain an arbitrary number
1.12    ! casties   248: of the following additional metadata elements.
        !           249: 
        !           250: \subsection{workflow state}
        !           251: \label{sec:workflow-state}
        !           252: 
        !           253: All additional metadata elements can have a \texttt{workflow-state}
        !           254: \textbf{attribute}. This attribute reflects the state of the
        !           255: corresponding metadata element. The possible values for the
        !           256: \texttt{workflow-state} attribute are
        !           257: \begin{itemize}
        !           258: \item \texttt{preliminary} this information is preliminary. It must
        !           259:   be checked in further workflow steps.
        !           260: 
        !           261: \item \texttt{inwork}
        !           262: 
        !           263: \item \texttt{final}
        !           264: \end{itemize}
        !           265: 
        !           266: workflow states other than \texttt{preliminary} are part of the
        !           267: workflow handling of the respective projects.
        !           268: 
        !           269: Metadata elements can appear multiple times with different
        !           270: \texttt{workflow-state} attributes. This enables metadata versioning.
        !           271: 
        !           272: 
        !           273: 
        !           274: \subsection{Content type}
        !           275: \label{sec:content-type}
        !           276: 
        !           277: \begin{description}
        !           278: \item[content-type] \label{tag-content-type} The content type of this
        !           279:   resource -- required.\\
        !           280:   The content type enables the choice of tools to manipulate and
        !           281:   display the resource. There should be a common list of content
        !           282:   types. For digital documents (books, manuscripts) this would be
        !           283:   "scanned document", for other image data "scanned
        !           284:   images".\footnote{The criterion for documents is a ordered
        !           285:     succession of image files (pages) and equal image size and
        !           286:     resolution throughout the images of a resource.}
        !           287: \end{description}  
        !           288: 
        !           289: 
1.1       casties   290: 
1.4       casties   291: \subsection{Language}
                    292: \label{sec:lang}
                    293: 
                    294: The language of a resource (e.g. a text) can be specified with a
                    295: \texttt{lang} tag. Languages have to be described using the
                    296: international codes for the representation of names of languages
                    297: either in two-letter form (ISO 639-1) or in three-letter form (ISO
                    298: 639-2).  The entire catalogue of languages is documented on the page
                    299: 
                    300: \url{http://www.loc.gov/standards/iso639-2/englangn.html}
                    301: 
1.1       casties   302: 
                    303: \subsection{DRI}
                    304: \label{sec:dri}
                    305: 
                    306: The \emph{digital resource identifier} for the resource is specified
1.4       casties   307: in a \texttt{dri} element. Digital resource identifiers are documented
1.1       casties   308: on the page
                    309: 
                    310: \url{http://pythia.mpiwg-berlin.mpg.de/projects/standards/dri}.
                    311: 
                    312: 
1.4       casties   313: 
                    314: \subsection{Collection context}
                    315: \label{sec:collection-context}
                    316: 
                    317: The context of a resource as part of a collection or part of a project can be
1.5       casties   318: specified in the \texttt{context} element. All elements in the
                    319: container can appear multiple times.
1.4       casties   320: 
                    321: \begin{description}
1.5       casties   322: \item[context] information on collection or project context.
1.4       casties   323: 
1.5       casties   324:   \begin{description}
                    325:   \item[link] URL to additional context information.
                    326:     
                    327:   \item[name] Textual description of project or collection.
                    328:   \end{description}
1.4       casties   329: \end{description}
1.5       casties   330: 
1.4       casties   331: 
                    332: 
                    333: 
1.1       casties   334: \subsection{Bibliographic information}
                    335: \label{sec:bibliographic-data}
                    336: 
1.5       casties   337: Bibliographic information is presented in a \texttt{bib} container with
1.1       casties   338: a \texttt{type} parameter, giving the type of bibliographic resource.
1.4       casties   339: The \texttt{type} field can be repeated as a tag in the container.
                    340: 
1.5       casties   341: The format is based on the ECHO scheme for bibliographic data (cf.
                    342: content workflow), the MPIWG ``Projektbibliografie'' and the format of
                    343: the commonly used program ``EndNote''.
                    344: 
1.4       casties   345: 
                    346: \subsubsection{Book}
                    347: 
                    348: \begin{description}
                    349: 
                    350: \item [bib type="book"] a published book.
                    351: 
                    352:   \begin{description}
                    353:   \item [author] The author of the book.
                    354:   \item [year] The year of publication.
                    355:   \item [title] Title of the book.
                    356:   \item [series-editor] Name of the series editor, if the book appears
                    357:     in a series.
                    358:   \item [series-title] Title of the serie, if the book appears in a
                    359:     series.
                    360:   \item [series-volume] Volume number, if the book appears in a
                    361:     series.
                    362:   \item [number-of-pages] Number of pages of the entire book.
                    363:   \item [city] City where the book was published.
                    364:   \item [publisher] Name of the publishing company
                    365:   \item [edition] Edition of the book (e.g. third edition)
                    366:   \item [number-of-volumes] Number of volumes, if the the book is
                    367:     published in multiple volumes.
                    368:   \item [translator] Name of the translator.
                    369:   \item [isbn-issn]
                    370:   \end{description}
                    371: \end{description}
                    372: 
                    373: \subsubsection{In Book}
                    374: 
                    375: \begin{description}
                    376: \item [bib type="inbook"] an article as part of a book.
                    377: 
                    378:   \begin{description}
                    379:   \item [author] The author of the book.
                    380:   \item [year] The year of publication.
                    381:   \item [title] Title of the article.
                    382:   \item [editor] Name of the book's editor.
                    383:   \item [book-title] Title of the book.
                    384:   \item [series-volume] Volume number, if the book appears in a
                    385:     series.
                    386:   \item [pages] Number of pages of the article.
                    387:   \item [city] City where the book was published.
                    388:   \item [publisher] Name of the publishing company
                    389:   \item [edition] Edition of the book (e. g. third edition)
                    390:   \item [series-author] Name of the series editor, if the book appears
                    391:     in a series.
                    392:   \item [series-title] Title of the series, if the book appears in a
                    393:     series.
                    394:   \item [number-of-volumes] Number of volumes, if the the book is
                    395:     published in multiple volumes.
                    396:   \item [translator] Name of the translator
                    397:   \item [isbn-issn]
                    398:   \end{description}
                    399: \end{description}
                    400: 
                    401: \subsubsection{Proceedings}
                    402: 
                    403: \begin{description}
                    404: \item [bib type="proceedings"] a conference proceedings publication.
                    405: 
                    406:   \begin{description}
                    407:   \item [author] The author of the article.
                    408:   \item [year] The year of publication.
                    409:   \item [title] Title of the article.
                    410:   \item [editor] Name of the book's editor.
                    411:   \item [conference-name] Name of the conference the proceedings are
                    412:     related to.
                    413:   \item [volume] Volume number.
                    414:   \item [pages] Number of pages of the article.
                    415:   \item [date] Date of the conference the proceedings are related to.
                    416:   \item [conference]-location City where the conference was held.
                    417:   \item [publisher] Name of the publishing company
                    418:   \item [edition] Edition of the book (e. g. third edition)
                    419:   \item [series-editor] Name of the series editor, if the book appears
                    420:     in a series.
                    421:   \item [series-title] Title of the series, if the book appears in a
                    422:     series.
                    423:   \item [number-of-volumes] Number of volumes, if the the book is
                    424:     published as multiple volumes.
                    425:   \item [isbn-issn]
                    426:   \end{description}
                    427: \end{description}
                    428: 
                    429: \subsubsection{Edited Book}
                    430: 
                    431: \begin{description}
                    432: \item[bib type="edited-book"] a book that is the edition of another
                    433:   work.
                    434: 
                    435:   \begin{description}
                    436:   \item [editor] Name of the editor of the book.
                    437:   \item [year] The year of publication.
                    438:   \item [title] Title of the book.
                    439:   \item [series-editor] Name of the editor of the series the book is
                    440:     part of.
                    441:   \item [series-title] Title of the series, if the book is part of a
                    442:     series.
                    443:   \item [series-volume] Volume number, if the book appears in a series.
                    444:   \item [number-of-pages] Number of pages of the article.
                    445:   \item [city] City where the book was published.
                    446:   \item [publisher] Name of the publishing company
                    447:   \item [edition] Information about the edition (e.g. ``Repr. of the London ed. 1652'')
                    448:   \item [number-of-volumes] Number of volumes, if the the book is
                    449:     published as multiple volumes.
                    450:   \item [isbn-issn]
                    451:   \end{description}
                    452: \end{description}
                    453: 
                    454: \subsubsection{Journal Article}
                    455: 
                    456: \begin{description}
                    457: \item [bib type="journal-article"] an article in a scientific journal.
                    458:   \begin{description}
                    459:   \item [author] The author of the article.
                    460:   \item [year] The year of publication.
                    461:   \item [title] Title of the article.
                    462:   \item [journal] Name of the journal.
                    463:   \item [volume] Volume number, if the journal appears in a series.
                    464:   \item [issue] Number of the issue the article is part of.
                    465:   \item [pages] Number of pages of the article.
                    466:   \item [alternate-journal] Alternate Journal
                    467:   \item [isbn-issn]
                    468:   \end{description}
                    469: \end{description}
                    470: 
                    471: \subsubsection{Magazine Article}
                    472: 
                    473: \begin{description}
                    474: \item [bib type="magazine-article"] an article in a popular magazine.
                    475:   \begin{description}
                    476:   \item [author] The author of the book.
                    477:   \item [year] The year of publication.
                    478:   \item [title] Title of the article.
                    479:   \item [magazine] Name of the magazine.
                    480:   \item [volume] Volume number, if the book appears in a series.
                    481:   \item [issue-number] Number of the issue the article is part of.
                    482:   \item [pages Number] of pages of the article.
                    483:   \item [date] Date when the article appeared.
                    484:   \end{description}
                    485: \end{description}
                    486: 
                    487: \subsubsection{Newspaper Article}
                    488: 
                    489: \begin{description}
                    490: \item [bib type="newspaper-article"] an article in a newspaper.
                    491:   \begin{description}
                    492:   \item [author] The author of the article.
                    493:   \item [year] The year of publication.
                    494:   \item [title] Title of the article.
                    495:   \item [Newspaper] Name of the newspaper the article appeared in.
                    496:   \item [pages] Number of pages of the article.
                    497:   \item [issue-date] Date of the issue the article is part of.
                    498:   \item [city] City of the newspaper.
                    499:   \end{description}
                    500: \end{description}
                    501: 
                    502: \subsubsection{Thesis}
                    503: 
                    504: \begin{description}
                    505: \item [bib type="thesis"] a master/doctorate/etc. thesis.
                    506:   \begin{description}
                    507:   \item [author] The author of the thesis.
                    508:   \item [year] The year of publication.
                    509:   \item [title] Title of the thesis.
                    510:   \item [academic-department] Name of the academic department where
                    511:     the thesis was handed in.
                    512:   \item [number-of-pages] Number of pages of the thesis.
                    513:   \item [city] City where the thesis was published.
                    514:   \item [University] Name of the university where the thesis was
                    515:     handed in.
                    516:   \item [isbn-issn]
                    517:   \end{description}
                    518: \end{description}
                    519: 
                    520: \subsubsection{Report}
                    521: 
                    522: \begin{description}
                    523: \item [bib type="report"] a scientific report.
                    524:   \begin{description}
                    525:   \item [author] The author of the report.
                    526:   \item [year] The year of publication.
                    527:   \item [title] Title of the report.
                    528:   \item [pages] Number of pages of the report.
                    529:   \item [date] Date when the report appeared.
                    530:   \item [city] City where the book was published.
                    531:   \item [institution] Institution where the report was produced.
                    532:   \item [type] Type of report.
                    533:   \item [report-number] Report number.
                    534:   \end{description}
                    535: \end{description}
                    536: 
1.5       casties   537: \subsubsection{Manuscript}
                    538: 
                    539: \begin{description}
                    540: \item [bib type="manuscript"] a handwritten/typewritten manuscript.
                    541: 
                    542:   \begin{description}
                    543:   \item [title] Title of the manuscript.
                    544:   \item [author] The author of the text.
                    545:   \item [location] Name of the library where the manuscript is
                    546:     currently located.
                    547:   \item [year] The year or century of publication.
                    548:   \item [pages] Number of pages of the manuscript.
                    549:   \item [signature] Signature of the manuscript.
                    550:   \item [editorial-remarks] Remarks related to the online
                    551:     publication of the manuscript. This could be notes about
                    552:     annotations etc.
                    553:   \item [description] This can be any kind of description.
                    554:   \item [keywords] Keywords related to the manuscript.
                    555:   \end{description}
                    556: \end{description}
                    557: 
                    558: 
1.4       casties   559: \subsubsection{Generic}
                    560: 
                    561: \begin{description}
                    562: \item [bib type="generic"] a generic bibliographic type. This type
                    563:   should only be used in rare cases.
                    564:   \begin{description}
                    565:   \item [author]
                    566:   \item [year]
                    567:   \item [title]
                    568:   \item [secondary-author]
                    569:   \item [secondary-title]
                    570:   \item [volume]
                    571:   \item [number]
                    572:   \item [pages]
                    573:   \item [date]
                    574:   \item [place-published]
                    575:   \item [publisher]
                    576:   \item [edition]
                    577:   \item [tertiary author]
                    578:   \item [tertiary-title]
                    579:   \item [number-of-volumes]
                    580:   \item [type-of-work]
                    581:   \item [subsidiary author]
                    582:   \item [alternate-title]
                    583:   \item [isbn-issn]
                    584:   \item [call-number]
                    585:   \item [label]
                    586:   \item [keywords]
                    587:   \item [abstract]
                    588:   \item [notes]
                    589:   \item [url]
1.5       casties   590:   \end{description}
1.4       casties   591: \end{description}
                    592: 
                    593: 
                    594: \subsection{Architectural drawings}
                    595: \label{sec:doc}
                    596: 
                    597: Specific information for architectural drawings is presented in a
1.5       casties   598: \texttt{doc} container with an additional \texttt{type} attribute
                    599: giving the type of drawing. All elements inside the container can
                    600: appear multiple times.
1.4       casties   601: 
                    602: \begin{description}
1.5       casties   603: 
                    604: \item[doc type="Architectural Drawing"] architectural drawing.
                    605: 
                    606:   \begin{description}
                    607:   \item [person] last name and first name of a person, separated by a
                    608:     comma. A further common name for the person can be put infront,
                    609:     separated by a semicolon.
                    610:   \item [location] Name of a place in its common notation. This can be
                    611:     a city or a institution.
                    612:   \item [date] This can be a year (or several years, separated by
                    613:     commas) or a period (1706-1714). Years are noted with four digits.
                    614:   \item [object] Short description of an object or signatures.
                    615:   \item [keywords] Keywords related to the object.
                    616: \end{description}
1.4       casties   617: \end{description}
1.1       casties   618: 
                    619: 
1.10      casties   620: \subsection{Document structure (table of contents)}
1.1       casties   621: \label{sec:toc}
                    622: 
1.4       casties   623: Information on the structure of a document like the division into
                    624: parts and chapters in the way of a table of contents is presented in a
                    625: \texttt{toc} container. 
                    626: 
                    627: The scheme allows multiple logical pages on a single page image
                    628: as it is often the case with scanned books or manuscripts. The scheme
                    629: also allows for ``loose'' numbering schemes with roman, arabic or
                    630: other page numbers consecutively or mixed and changes in the numbering
                    631: within the document.
                    632: 
                    633: The flexibility comes from the fact that no additional assumptions
                    634: about the mapping between logical pages and page images are made in
                    635: the format. All mapping information is specified by the user.
                    636: 
                    637: The logical page numbering or naming that can be presented to the user
                    638: is specified in the \texttt{name} tags while the physical numbering of
                    639: the page images is specified in the \texttt{index} or \texttt{url}
                    640: tags.
1.1       casties   641: 
1.4       casties   642: \begin{description}
1.5       casties   643: \item[toc] container for document structure
                    644: 
1.4       casties   645:   \begin{description}
1.5       casties   646:   \item[page] describes a single logical page
                    647: 
                    648:     \begin{description}
                    649:     \item[name] the ``name'' of the logical page. This can be any string
                    650:       like a page number (arabic, roman, etc.) or a special designation
                    651:       like ``Table 5''.
                    652:       
                    653:     \item[index] the \texttt{digilib} index number\footnote{The index
                    654:         number for digilib is the index in the alphabetical order of the
                    655:         scan file names.} of the scan image of the page.
                    656:       
                    657:     \item[url] alternatively to the \texttt{digilib} index number the
                    658:       full URL of the scan image of the page can be used.
                    659:     \end{description}
1.4       casties   660:     
1.5       casties   661:   \item[chapter] describes a section or chapter of the text.
                    662:     \texttt{chapter} elements can be nested.
1.1       casties   663: 
1.4       casties   664:     \begin{description}
1.5       casties   665:     \item[name] the title of the chapter or section.
                    666:       
                    667:     \item[start] the beginning of a page range (usually the first page
                    668:       of the chapter). The \texttt{start} element has an optional
                    669:       \texttt{increment} attribute to indicate the number of logical
                    670:       pages on a scan image.\footnote{This information is only needed by
                    671:         additional tools that try to generate lists of all page and
                    672:         image numbers.}
                    673: 
                    674:       \begin{description}
                    675:       \item[name] the ``name'' of the first page (see \texttt{page}).
                    676:         
                    677:       \item[index] the index of the first page (see \texttt{page}).
                    678:         
                    679:       \item[url] the URL of the first page (see \texttt{page}).
                    680:       \end{description}
                    681:       
                    682:     \item[end] the end of a page range (usually the last page of the
                    683:       chapter).
                    684: 
                    685:       \begin{description}
                    686:       \item[name] the ``name'' of the last page (see \texttt{page}).
                    687:         
                    688:       \item[index] the index of the last page (see \texttt{page}).
                    689:         
                    690:       \item[url] the URL of the last page (see \texttt{page}).
                    691:       \end{description}
                    692:       
                    693:     \item[page] alternative (and additional) to
                    694:       \texttt{start}/\texttt{end} page ranges single \texttt{page}
                    695:       elements can be used inside \texttt{chapter}.
1.4       casties   696:     \end{description}
                    697:   \end{description}
                    698: \end{description}
                    699: 
                    700: %%\url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TS_lise}
1.1       casties   701: 
                    702: 
1.12    ! casties   703: \subsection{Digital images}
1.1       casties   704: \label{sec:inform-scann-imag}
                    705: 
                    706: Image files representing scanned images can have an \texttt{img}
                    707: container tag with information about the scan resolution and the size
                    708: of the original image. This information is used by the
                    709: \texttt{digilib} image viewing tool.
                    710: 
                    711: Required is one of three possible sets of tags:
                    712: 
                    713: \begin{description}
1.5       casties   714: \item[img] digital image information.
1.1       casties   715: 
1.5       casties   716:   \begin{description}
1.12    ! casties   717:   \item[original-size-x] The width of the original
        !           718:     image -- required. \\
        !           719:     The unit of measure can be contained as parameter \texttt{unit},
        !           720:     the default is meter ``m''. The width to be considered is the
        !           721:     total width of the scanned area.
1.5       casties   722:     
1.12    ! casties   723:   \item[original-size-y] The height of the original image -- required.
1.5       casties   724:     
1.12    ! casties   725:   \item[original-pixel-x] The width of the hi-res scan in pixels -- deduced.
1.5       casties   726:     
1.12    ! casties   727:   \item[original-pixel-y] The height of the hi-res scan in pixels -- deduced.
1.5       casties   728:   \end{description}
1.1       casties   729: \end{description}
                    730: 
                    731: or
                    732: 
                    733: \begin{description}
1.5       casties   734: \item[img] digital image information.
                    735: 
                    736:   \begin{description}
                    737:   \item[original-dpi-x] The resolution of the hi-res scan in its width
1.12    ! casties   738:     in pixels per inch -- required.
1.1       casties   739: 
1.5       casties   740:   \item[original-dpi-y] The resolution of the hi-res scan in its height
1.12    ! casties   741:     in pixels per inch -- required.
        !           742: 
        !           743:   \item[original-pixel-x] The width of the hi-res scan in pixels -- deduced.
        !           744:     
        !           745:   \item[original-pixel-y] The height of the hi-res scan in pixels -- deduced.
1.5       casties   746:   \end{description}
1.1       casties   747: \end{description}
                    748: 
                    749: or
                    750: 
                    751: \begin{description}
1.5       casties   752: \item[img] digital image information.
                    753: 
                    754:   \begin{description}
                    755:   \item[original-dpi] The resolution of the hi-res scan in pixels per
1.12    ! casties   756:     inch if the resolutions in width and height are the same -- required.
        !           757: 
        !           758:   \item[original-pixel-x] The width of the hi-res scan in pixels -- deduced.
        !           759:     
        !           760:   \item[original-pixel-y] The height of the hi-res scan in pixels -- deduced.
1.5       casties   761:   \end{description}
1.1       casties   762: \end{description}
1.7       casties   763: 
                    764: 
1.10      casties   765: 
1.12    ! casties   766: \subsection{Digital image acquisition}
1.10      casties   767: \label{sec:inform-about-image}
                    768: 
                    769: A description of the technology used in the process of producing a
                    770: digital image.
                    771: 
                    772: \begin{description}
                    773: \item[image-acquisition] description of the image production process
                    774:   \begin{description}
1.12    ! casties   775:   \item[device] acquisition device (e.g. ``flatbed scanner'') 
1.10      casties   776: 
1.12    ! casties   777:   \item[image-type] type and color-depth of the image -- required (e.g. ``RGB 24
1.10      casties   778:     bit'')
                    779: 
                    780:   \item[production-comment] additional textual information about the
                    781:     production process
                    782:   \end{description}
                    783: \end{description}
                    784: 
                    785: 
1.12    ! casties   786: 
1.7       casties   787: \subsection{Full text with images}
                    788: \label{sec:full-text-with}
                    789: 
1.12    ! casties   790: Full text in a XML format should be specified with a
        !           791: \texttt{content-type}\footnote{see section~\ref{tag-content-type}
        !           792: on page\pageref{tag-content-type}} ``fulltext''.
1.8       casties   793: 
                    794: The relation between the full text and optional images of
                    795: whole pages or parts of pages must be specified in a
                    796: \texttt{text-tool} container.
                    797: 
                    798: \begin{description}
                    799: \item[text-tool] representation of full text with images
                    800: 
                    801:   \begin{description}
                    802:   \item[text-file] the file name of the full text file (with path
                    803:     inside document directory)
1.12    ! casties   804:     
1.8       casties   805:   \item[page-images] the directory name of the directory containig the
1.12    ! casties   806:     page image files (with path inside document directory)
1.8       casties   807: 
                    808:   \item[xslt-file] the file name of an additional XSL transformation
                    809:     file
                    810: 
                    811:   \item[text-config] container for configuration options
1.10      casties   812:     \begin{description}
                    813:     \item[container-tag] the name of the text root element (default
                    814:       ``text'')
                    815:       
                    816:     \item[ref-element-tag] the name of the element that is used as
                    817:       unit of reference when results are presented
1.8       casties   818:     
1.10      casties   819:     \item[pagebreak-tag] the name of the element that indicates page
                    820:       breaks (default ``pb'')
                    821:     \end{description}
1.8       casties   822:   \end{description}
                    823: \end{description}
1.7       casties   824: 
1.1       casties   825: 
                    826: 
1.12    ! casties   827: \subsection{Copyright and access conditions}
        !           828: \label{sec:access-conditions}
        !           829: 
        !           830: If the access to a resource is bound to conditions for technical or legal
        !           831: reasons then the conditions can be put in a \texttt{access-conditions}
        !           832: container. Other access rights conditions like copyright can also be
        !           833: documented in this container.
        !           834: 
        !           835: \begin{description}
        !           836: \item[access-conditions] legal and technical conditions for access to
        !           837:   this resource
        !           838: 
        !           839:   \begin{description}
        !           840:   \item[attribution] The name or institution this resource should be
        !           841:     attributed to when it's publicly presented
        !           842: 
        !           843:     \begin{description}
        !           844:     \item[name] a name (free text)
        !           845: 
        !           846:     \item[url] a URL (with an optional \texttt{label} attribute to show
        !           847:       as text)
        !           848:     \end{description}
        !           849: 
        !           850:   \item[copyright] the copyright owner and it's conditions
        !           851:     \begin{description}
        !           852:     \item[owner] the name of the copyright owner
        !           853:       \begin{description}
        !           854:       \item[name] a name (free text)
        !           855: 
        !           856:       \item[url] a URL (with an optional \texttt{label} attribute to show
        !           857:         as text)
        !           858:       \end{description}
        !           859: 
        !           860:     \item[date] the date when the copyright was issued
        !           861: 
        !           862:     \item[duration] the duration of the copyright (if known)
        !           863: 
        !           864:     \item[description] free-text field for special or additional
        !           865:       conditions
        !           866:     \end{description}
        !           867: 
        !           868:   \item[access] conditions of access to this resource
        !           869:     \begin{description}
        !           870:     \item[internal] access should be restricted to a group of users. The
        !           871:       type of group is defined by one of the following
        !           872:       \begin{description}
        !           873:       \item[institution] the members of this institution. The method
        !           874:         to identify a user to belong to the institution is not
        !           875:         specified in this document.
        !           876: 
        !           877:       \item[subnet] all computers with an IP-address in this subnet. The
        !           878:         subnet is defined in ``truncated-quad'' (e.g. ``141.14'') or
        !           879:         ``adress/netmask'' (e.g. ``141.14.0.0/255.255.0.0'') notation.
        !           880:         
        !           881:       \item[group] the members of this named group. The method to
        !           882:         identify a user to belong to a named group is not specified in
        !           883:         this document.
        !           884:       \end{description}
        !           885: 
        !           886:     \item[scientific] access to this resource should be restricted to
        !           887:       scientific work
        !           888: 
        !           889:     \item[free] access to this resource is not restricted
        !           890:       
        !           891:     \item[special] if none of the above conditions seems appropriate,
        !           892:       a free-form text can be specified here.
        !           893:     \end{description}
        !           894:   \end{description}
        !           895: \end{description}
        !           896: 
        !           897: \noindent
        !           898: It should be noted that control over the access to the resource has to
        !           899: be provided by additional technical measures. Access conditions in the
        !           900: metadata file only state that conditions \emph{should} be observed,
        !           901: not that they \emph{are} necessarily observed, as the enforcement of
        !           902: conditions depends on additional technical measures.
        !           903: 
        !           904: 
        !           905: 
        !           906: \subsection{Acquisition of raw-data}
        !           907: \label{sec:acqu-inform}
        !           908: 
        !           909: Information about the acquisition source for raw data resources can be
        !           910: provided in an \texttt{acquisition} container.
        !           911: 
        !           912: \begin{description}
        !           913: \item[acquisition] the acquisition source of this resource -- required
        !           914:   for raw data.
        !           915:   \begin{description}
        !           916:   \item[provider] where this resource came from -- required
        !           917:     \begin{description}
        !           918:     \item[name] free-text name of the provider (institution or
        !           919:       individual)
        !           920: 
        !           921:     \item[address] address of the provider
        !           922: 
        !           923:     \item[contact] contact person at the provider (i.e. name and email)
        !           924: 
        !           925:     \item[url] URL related to the provider
        !           926:     \end{description}
        !           927: 
        !           928:   \item[date] date of acquisition -- required
        !           929: 
        !           930:   \item[description] free-text description of the acquisition source or
        !           931:   additional information
        !           932:   \end{description}
        !           933: \end{description}
        !           934: 
        !           935: 
        !           936: 
        !           937: \subsection{Documentary Films}
        !           938: \label{sec:documentary-films}
        !           939: 
        !           940: Documentary films can be described using a \texttt{film-acquisition}
        !           941: container.
        !           942: 
        !           943: \begin{description}
        !           944: \item[film-acquisition] description of a (documentary) film --
        !           945:   required for documentary film
        !           946:   \begin{description}
        !           947:   \item[recording] specification of the recording process
        !           948:     \begin{description}
        !           949:     \item[author] the person or persons doing the recording
        !           950: 
        !           951:     \item[date] the date or time span when the film was recorded
        !           952: 
        !           953:     \item[location] the place where the film was recorded
        !           954: 
        !           955:     \item[device] recording device used (e.g. ``Sony CP-DV8 Camcorder'')
        !           956:       
        !           957:     \item[format] format of the recorded film -- required (e.g. ``DV
        !           958:       720x524 25fps interlaced'')
        !           959:     \end{description}
        !           960:  
        !           961:   \item[description] free-form description of the recording and the
        !           962:     content of the film
        !           963:   \end{description}
        !           964: \end{description}
        !           965: 
        !           966: (More information about the digitization step could be added in a
        !           967: \texttt{digitization} tag similar to the \texttt{recording} tag.)
        !           968: 
1.1       casties   969: 
                    970: 
                    971: 
1.4       casties   972: \section{Sample metadata files for ECHO resources}
1.1       casties   973: 
1.5       casties   974: The following is a sample metadata index file for a directory containig a
                    975: scanned document.
                    976: 
                    977: \begin{small}
1.1       casties   978: \begin{verbatim}
1.11      casties   979: <resource type="ECHO" version="1.0">
1.5       casties   980:   <description>Fleck, 1980</description>
                    981:   <name>fleck.1980</name>
                    982:   <creator>University of Bern</creator>
                    983:   <archive-path>ubern/wiss-theorie</archive-path>
                    984:   <content-type>scanned images</content-type>
                    985:   <meta>
                    986:     <dri>echo23a45e2329x</dri>
                    987:     <lang>ger</lang>
                    988:     <bib type="book">
                    989:       <author>Fleck, Ludwik</author>
                    990:       <year>1980</year>
                    991:       <title>Entstehung und Entwicklung einer 
                    992:              wissenschaftlichen Tatsache</title>
                    993:       <series-editor></series-editor>
                    994:       <series-title></series-title>
                    995:       <series-volume></series-volume>
                    996:       <number-of-pages></number-of-pages>
                    997:       <city>Frankfurt am Main</city>
                    998:       <publisher>Suhrkamp</publisher>
                    999:       <edition></edition>
                   1000:       <number-of-volumes></number-of-volumes>
                   1001:       <translator></translator>
                   1002:       <isbn-issn></isbn-issn>
                   1003:       <keywords>Wissenschaftstheorie, Fleck, Tatsache</keywords>
                   1004:       <abstract></abstract>
                   1005:     </bib>
                   1006:   </meta>
                   1007:   <dir>
                   1008:      <description>Scanned images (300dpi)</description>
                   1009:      <name>img</name>
                   1010:   </dir>
1.4       casties  1011: </resource>
                   1012: \end{verbatim}
1.5       casties  1013: \end{small}
1.4       casties  1014: 
1.5       casties  1015: The following is a sample metadata file for a single image of an
                   1016: architectural drawing.
1.4       casties  1017: 
1.5       casties  1018: \begin{small}
1.4       casties  1019: \begin{verbatim}
1.11      casties  1020: <resource type="ECHO" version="1.0">
1.5       casties  1021:   <creator>Bibliotheca Hertziana</creator>
                   1022:   <content-type>scanned images</content-type>
                   1023:   <file>
                   1024:     <name>00000271-asl-160-r-full.tif</name>
                   1025:     <meta>
                   1026:       <img>
                   1027:         <original-dpi>315</original-dpi>
                   1028:       </img>
                   1029:       <dri>echo45a67bc4367d</dri>
                   1030:       <lang>ita</lang>
                   1031:       <doc type="Architectural Drawing">
                   1032:         <person>Ciolli, Giacomo</person>
                   1033:         <person>Urban VIII; Barberini, Maffeo</person>
                   1034:         <location>Accademia di San Luca</location>
                   1035:         <location>Roma</location>
                   1036:         <date>1706</date>
                   1037:         <object>Concorso Clementino</object>
                   1038:         <object>Fontana Pubblica</object>
                   1039:         <object>Brunnen</object>
                   1040:         <object>ASL 160</object>
                   1041:         <keywords></keywords>
                   1042:       </doc>
                   1043:       <context>
                   1044:          <url>http://colosseum.biblhertz.it:8080/Lineamenta/
                   1045:          1033478408.39/1035196181.35/1035196204.09/1035394121.83
                   1046:          </url>
                   1047:       </context>
                   1048:     </meta>
                   1049:   </file>
1.2       casties  1050: </resource>
1.1       casties  1051: \end{verbatim}
1.5       casties  1052: \end{small}
1.1       casties  1053: 
                   1054: \end{document}
                   1055: 
                   1056: %%% Local Variables: 
                   1057: %%% mode: latex
                   1058: %%% TeX-master: t
                   1059: %%% End: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>