Annotation of storage/meta/meta-format.tex, revision 1.17

1.1       casties     1: \documentclass[a4paper]{article}
                      2: 
                      3: \usepackage[latin1]{inputenc}
                      4: \usepackage[T1]{fontenc}
                      5: \usepackage{ae}
                      6: %\usepackage{times}
                      7: %\usepackage{courier}
                      8: 
                      9: % create in-text links black (with PDF)
1.6       casties    10: \usepackage[colorlinks=true,linkcolor=black]{hyperref}
1.1       casties    11: % Format URLs nicely (without PDF)
1.6       casties    12: %\usepackage{url}
1.1       casties    13: 
                     14: 
                     15: \title{A simple metadata format for resource bundles}
                     16: 
1.4       casties    17: \author{Robert Casties, Dirk Wintergrün, Hans-Christoph Liess}
1.1       casties    18: 
1.17    ! casties    19: \date{V1.3.1 of 27.6.2006}
1.1       casties    20: 
                     21: \begin{document}
                     22: 
                     23: \maketitle
                     24: 
                     25: \tableofcontents
                     26: 
                     27: 
                     28: \section{File and directory names}
                     29: \label{sec:file-directory-names}
                     30: 
                     31: File and directory names should not contain spaces. Allowed characters
                     32: in filenames are only the alphanumeric set a-z, A-Z, 0-9, hyphen
                     33: ``-'', underscore ``\_'' and dot ``.''.
                     34: 
1.12      casties    35: Files and directories with names that contain illegal characters must
                     36: be transformed to allowed names. A proposition for a simple
                     37: transformation rule is
                     38: 
                     39: \begin{itemize}
                     40: \item whitespace characters (e.g. blank, tab, cr, lf) are replaced by
                     41:   hyphens ``-''
                     42: 
                     43: \item other illegal characters are replaced by underscores ``\_''.
                     44: \end{itemize}
                     45: 
                     46: This rule does not provide a reversible mapping to the original
                     47: illegal file name and it does not provide a collision-free mapping,
                     48: i.e. two different illegal file names might be mapped to the same
                     49: allowed file name. Additional precautions for these cases must be
                     50: taken.
1.1       casties    51: 
1.4       casties    52: 
                     53: \section{Metadata files}
                     54: \label{sec:metadata-files}
                     55: 
                     56: The metadata information is stored in the XML format documented below
                     57: in special files in the resource directory. Two forms of metadata
                     58: files are possible:
                     59: \begin{itemize}
                     60: \item a file named \texttt{index.meta} in a directory.
                     61: 
1.16      casties    62: \item a file with the same name as the data file it describes and an
1.4       casties    63:   additional extension \texttt{.meta}. For example metadata for the
1.16      casties    64:   file \texttt{p0001.tif} would be in a file \texttt{p0001.tif.meta}.
1.4       casties    65: \end{itemize}
                     66: 
                     67: The resource directory must contain an \texttt{index.meta} file with
1.16      casties    68: information about the resource as a whole. Subdirectories can
                     69: contain additional \texttt{index.meta} files.
1.4       casties    70: 
                     71: Additional information about single data files that are part of the
                     72: resource can either be put in \texttt{file} tags in the
                     73: \texttt{index.meta} file or in separate \emph{filename}\texttt{.meta}
                     74: files for each data file. Information from the directory level file is
1.16      casties    75: inherited at the file level when it is not overwritten.
1.4       casties    76: 
                     77: 
1.1       casties    78: \section{Resource format}
                     79: \label{sec:mpiwg-doc}
                     80: 
                     81: In this description elements marked ``optional'' need not be supplied
                     82: by the provider of the resource and may be absent in all versions of
                     83: the metadata file. Elements marked ``required'' must be supplied by
                     84: the provider of the resource. Elements marked ``deduced'' can be
                     85: supplied by the provider of the resource but can also be provided by
1.4       casties    86: automatic scripts later in the process, these elements must be present
1.1       casties    87: in the final file.
                     88: 
1.12      casties    89: File and directory paths in the metadata file use the conventional
                     90: Unix file separator slash ``/''.
                     91: 
1.11      casties    92: The outer container element is \texttt{resource}. It has the following
                     93: \textbf{attributes}:
                     94: 
                     95: \begin{description}
1.12      casties    96: \item[type] sub-type of resource (e.g. ``ECHO'', ``MPIWG'') --
                     97:   optional.
1.11      casties    98:   
1.16      casties    99: \item[version] version number of metadata format (currently 1.2) --
1.11      casties   100:   required.
                    101: \end{description}
                    102: 
                    103: \noindent The allowed \textbf{elements} inside \texttt{resource} are:
1.1       casties   104: 
                    105: \begin{description}
1.14      casties   106: \item[description] An informal textual description of the resource --
                    107:   optional\footnote{At least one description of the resource's content
                    108:     is required. The description can be an informal
                    109:     \texttt{description} element or a descriptive element (like
                    110:     \texttt{bib}) in a \texttt{meta} container.}.
1.1       casties   111: 
                    112: \item[name] The filename of the resource (name of the directory this
                    113:   file is contained in) -- required.
                    114: 
                    115: \item[creator] The name of the project or person that created the
                    116:   resource -- optional.
1.4       casties   117:   
                    118: \item[archive-creation-date] The time and date the archive collection
                    119:   was created -- deduced.
1.1       casties   120: 
1.4       casties   121: \item[archive-storage-date] The time and date the archive was written
                    122:   to permanent storage -- deduced (must not be set by the user).
1.1       casties   123: 
                    124: \item[archive-path] The full path to the resource directory inside the
1.5       casties   125:   whole archive collection, including the resource directory -- deduced.
1.12      casties   126: 
                    127: \item[archive-id] The ID for this document in the archive --
1.16      casties   128:   optional.
1.1       casties   129:   
                    130: \item[derived-from] Container for the description of the original
                    131:   resource if this resource is a modified version of another resource
                    132:   -- optional.
                    133: 
                    134:   \begin{description}
1.12      casties   135:   \item[archive-id] The ID of the original resource
1.16      casties   136:     -- required (or archive-path).
1.12      casties   137: 
1.1       casties   138:   \item[archive-path] The full path to the original resource
1.16      casties   139:     -- required (or archive-id).
                    140: 
                    141:   \item[description] An informal textual description of the relation
                    142:   of this resource to the original resource -- optional.
                    143:   \end{description}
                    144:   
                    145: \item[used-by] Container for the description of modified resources
                    146:   if this resource is the source of another resource
                    147:   -- optional.
                    148: 
                    149:   \begin{description}
                    150:   \item[archive-id] The ID of the derived resource
                    151:     -- required (or archive-path).
                    152: 
                    153:   \item[archive-path] The full path to the derived resource
                    154:     -- required (or archive-id).
1.1       casties   155: 
                    156:   \item[description] An informal textual description of the relation
                    157:   of this resource to the original resource -- optional.
                    158:   \end{description}
                    159:   
                    160: \item[linked-with] Container for the description of another
                    161:   resource when this resource is a linked copy of another resource
                    162:   -- optional.
                    163: 
                    164:   \begin{description}
1.12      casties   165:   \item[archive-id] The ID of the linked resource
1.16      casties   166:     -- required (or archive-path).
1.12      casties   167: 
1.1       casties   168:   \item[archive-path] The full path to the linked resource
1.16      casties   169:     -- required (or archive-id).
1.1       casties   170: 
                    171:   \item[description] An informal textual description of the relation
                    172:   of this resource to the linked resource -- optional.
                    173:   \end{description}
                    174:   
1.12      casties   175: \item[media-type] \label{tag-media-type} The main media type of this
                    176:   resource -- required.\\ The main media type can be overridden by
                    177:   \texttt{media-type}s in subdirectories. Possible types are
                    178:   \begin{itemize}
                    179:   \item \texttt{image}
                    180: 
                    181:   \item \texttt{text}
                    182: 
                    183:   \item \texttt{audio}
                    184: 
                    185:   \item \texttt{video}
                    186: 
                    187:   \item \texttt{data} for other type of data
                    188:   \end{itemize}
1.1       casties   189:   
                    190: \item[meta] Additional metadata information about the resource --
                    191:   optional.\\ For a description of additional metadata see below.
                    192: 
                    193: \item[dir] Container for the description of a subdirectory -- required
                    194:   (when there are subdirectories).\\ \texttt{dir} tags should not be
                    195:   nested. Directories at lower levels are identified by their
                    196:   \texttt{path}.
                    197: 
                    198:   \begin{description}
                    199:   \item[description] An informal textual description of the
                    200:     subdirectory -- optional.
                    201: 
                    202:   \item[name] The name of the subdirectory -- required.
                    203:     
1.12      casties   204:   \item[original-name] A text string associated with the directory as
                    205:     original name -- optional. (E.g. if the data in this directory
                    206:     came from an external source and had a name that had to be changed
                    207:     according to section~\ref{sec:file-directory-names} but it should
                    208:     be possible to reference the original name.)
                    209:     
1.1       casties   210:   \item[path] The directory path of this subdirectory relative to the
1.5       casties   211:     resource's root directory (excluding the directory itself) --
                    212:     required (may be empty or omitted if the directory is a direct
                    213:     child of the resource's root directory).
1.1       casties   214:     
                    215:   \item[meta] Additional metadata information about the directory --
                    216:     optional.\\ For a description of additional metadata see below.
                    217:   \end{description}
                    218:   
                    219: \item[file] Container for the description of a file -- deduced.\\
                    220:   \texttt{file} tags should not be nested in \texttt{dir} tags. Files
                    221:   at lower directory levels are identified by their \texttt{path}.
                    222: 
                    223:   \begin{description}
                    224:   \item[description] An informal textual description of the
                    225:     file -- optional.
                    226: 
                    227:   \item[name] The name of the file -- required.
                    228:     
1.12      casties   229:   \item[original-name] A text string associated with the file as
1.16      casties   230:     original name -- optional. (e.g. if this file came from an
1.12      casties   231:     external source and had a name that had to be changed according to
1.16      casties   232:     section~\ref{sec:file-directory-names} it is possible
                    233:     to preserve the original name.)
1.12      casties   234:     
1.1       casties   235:   \item[path] The directory path of this file relative to the
1.5       casties   236:     resource's root directory (excluding the file itself) -- required
                    237:     (may be empty or omitted if the file is in the resource's root
                    238:     directory).
1.7       casties   239:     
                    240:   \item[date] The file's modification or creation date\footnote{The
                    241:       preferred time and date format is ``YYYY/MM/DD HH:MM:SS''},
                    242:     whichever is more recent -- optional.
1.1       casties   243: 
                    244:   \item[modification-date] The file's modification date -- optional.
                    245: 
                    246:   \item[creation-date] The file's creation date -- optional.
1.7       casties   247:     
1.1       casties   248:   \item[size] The file size -- deduced.
                    249:     
                    250:   \item[mime-type] The file's mime-type -- optional.
                    251: 
                    252:   \item[md5cs] MD5 checksum of the file content -- optional.
                    253:     
                    254:   \item[meta] Additional metadata information about the file --
                    255:     optional. For a description of additional metadata see below.
                    256:   \end{description}
                    257:   
                    258: \end{description}
                    259: 
                    260: 
                    261: 
                    262: \section{Additional metadata}
                    263: \label{sec:additional-metadata}
                    264: 
                    265: All elements with \texttt{meta} tags can contain an arbitrary number
1.12      casties   266: of the following additional metadata elements.
                    267: 
1.16      casties   268: \subsection{Workflow state}
1.12      casties   269: \label{sec:workflow-state}
                    270: 
                    271: All additional metadata elements can have a \texttt{workflow-state}
                    272: \textbf{attribute}. This attribute reflects the state of the
                    273: corresponding metadata element. The possible values for the
                    274: \texttt{workflow-state} attribute are
                    275: \begin{itemize}
                    276: \item \texttt{preliminary} this information is preliminary. It must
                    277:   be checked in further workflow steps.
                    278: 
                    279: \item \texttt{inwork}
                    280: 
                    281: \item \texttt{final}
                    282: \end{itemize}
                    283: 
                    284: workflow states other than \texttt{preliminary} are part of the
                    285: workflow handling of the respective projects.
                    286: 
                    287: Metadata elements can appear multiple times with different
                    288: \texttt{workflow-state} attributes. This enables metadata versioning.
                    289: 
                    290: 
                    291: 
                    292: \subsection{Content type}
                    293: \label{sec:content-type}
                    294: 
                    295: \begin{description}
                    296: \item[content-type] \label{tag-content-type} The content type of this
                    297:   resource -- required.\\
                    298:   The content type enables the choice of tools to manipulate and
                    299:   display the resource. There should be a common list of content
                    300:   types. For digital documents (books, manuscripts) this would be
                    301:   "scanned document", for other image data "scanned
                    302:   images".\footnote{The criterion for documents is a ordered
                    303:     succession of image files (pages) and equal image size and
                    304:     resolution throughout the images of a resource.}
                    305: \end{description}  
                    306: 
                    307: 
1.1       casties   308: 
1.4       casties   309: \subsection{Language}
                    310: \label{sec:lang}
                    311: 
                    312: The language of a resource (e.g. a text) can be specified with a
                    313: \texttt{lang} tag. Languages have to be described using the
                    314: international codes for the representation of names of languages
                    315: either in two-letter form (ISO 639-1) or in three-letter form (ISO
                    316: 639-2).  The entire catalogue of languages is documented on the page
                    317: 
                    318: \url{http://www.loc.gov/standards/iso639-2/englangn.html}
                    319: 
1.1       casties   320: 
                    321: \subsection{DRI}
                    322: \label{sec:dri}
                    323: 
                    324: The \emph{digital resource identifier} for the resource is specified
1.4       casties   325: in a \texttt{dri} element. Digital resource identifiers are documented
1.1       casties   326: on the page
                    327: 
                    328: \url{http://pythia.mpiwg-berlin.mpg.de/projects/standards/dri}.
                    329: 
                    330: 
1.4       casties   331: 
                    332: \subsection{Collection context}
                    333: \label{sec:collection-context}
                    334: 
1.15      casties   335: The context of a resource as part of a collection or part of a project
                    336: can be specified in the \texttt{context} element. The context element
                    337: can appear multiple times if the resource is part of multiple
                    338: collections or projects.
1.4       casties   339: 
                    340: \begin{description}
1.5       casties   341: \item[context] information on collection or project context.
1.4       casties   342: 
1.5       casties   343:   \begin{description}
1.15      casties   344:   \item[link] URL to additional context information -- optional.
1.5       casties   345:     
1.15      casties   346:   \item[name] Textual description of project or collection -- optional.
                    347: 
                    348:   \item[meta-datalink] description of external sources of canonical meta
                    349:     information -- optional
                    350:     \begin{description}
                    351:     \item[db] \textbf{attribute} to identify different sets of meta data
                    352:       links to the same resource -- optional
                    353: 
                    354:     \item[object] \textbf{attribute} to identify different objects or
                    355:       parts of the same resource -- optional
                    356: 
                    357:     \item[label] textual label for the link -- optional
                    358: 
                    359:     \item[url] URL to present to the client -- optional
                    360: 
                    361:     \item[metadata-url] URL to an external server to be queried -- optional
                    362:     \end{description}
                    363: 
                    364:   \item[meta-baselink] description of external server for canonical meta
                    365:     information -- optional
                    366:     \begin{description}
                    367:     \item[db] \textbf{attribute} to identify different sets of meta data
                    368:       links to the same resource -- optional
                    369: 
                    370:     \item[label] textual label for the link -- optional
                    371: 
                    372:     \item[url] URL to present to the client -- optional
                    373:       
                    374:     \item[metadata-url] URL to an external server to be queried --
                    375:       required (the parameter \texttt{object=} with an object id has
                    376:       to be appended to this URL)
                    377:     \end{description}
1.5       casties   378:   \end{description}
1.4       casties   379: \end{description}
1.5       casties   380: 
1.4       casties   381: 
                    382: 
                    383: 
1.1       casties   384: \subsection{Bibliographic information}
                    385: \label{sec:bibliographic-data}
                    386: 
1.5       casties   387: Bibliographic information is presented in a \texttt{bib} container with
1.1       casties   388: a \texttt{type} parameter, giving the type of bibliographic resource.
1.4       casties   389: The \texttt{type} field can be repeated as a tag in the container.
                    390: 
1.5       casties   391: The format is based on the ECHO scheme for bibliographic data (cf.
                    392: content workflow), the MPIWG ``Projektbibliografie'' and the format of
                    393: the commonly used program ``EndNote''.
                    394: 
1.4       casties   395: 
                    396: \subsubsection{Book}
                    397: 
                    398: \begin{description}
                    399: 
                    400: \item [bib type="book"] a published book.
                    401: 
                    402:   \begin{description}
                    403:   \item [author] The author of the book.
                    404:   \item [year] The year of publication.
                    405:   \item [title] Title of the book.
                    406:   \item [series-editor] Name of the series editor, if the book appears
                    407:     in a series.
                    408:   \item [series-title] Title of the serie, if the book appears in a
                    409:     series.
                    410:   \item [series-volume] Volume number, if the book appears in a
                    411:     series.
                    412:   \item [number-of-pages] Number of pages of the entire book.
                    413:   \item [city] City where the book was published.
                    414:   \item [publisher] Name of the publishing company
                    415:   \item [edition] Edition of the book (e.g. third edition)
                    416:   \item [number-of-volumes] Number of volumes, if the the book is
                    417:     published in multiple volumes.
                    418:   \item [translator] Name of the translator.
                    419:   \item [isbn-issn]
                    420:   \end{description}
                    421: \end{description}
                    422: 
                    423: \subsubsection{In Book}
                    424: 
                    425: \begin{description}
                    426: \item [bib type="inbook"] an article as part of a book.
                    427: 
                    428:   \begin{description}
                    429:   \item [author] The author of the book.
                    430:   \item [year] The year of publication.
                    431:   \item [title] Title of the article.
                    432:   \item [editor] Name of the book's editor.
                    433:   \item [book-title] Title of the book.
                    434:   \item [series-volume] Volume number, if the book appears in a
                    435:     series.
                    436:   \item [pages] Number of pages of the article.
                    437:   \item [city] City where the book was published.
                    438:   \item [publisher] Name of the publishing company
                    439:   \item [edition] Edition of the book (e. g. third edition)
                    440:   \item [series-author] Name of the series editor, if the book appears
                    441:     in a series.
                    442:   \item [series-title] Title of the series, if the book appears in a
                    443:     series.
                    444:   \item [number-of-volumes] Number of volumes, if the the book is
                    445:     published in multiple volumes.
                    446:   \item [translator] Name of the translator
                    447:   \item [isbn-issn]
                    448:   \end{description}
                    449: \end{description}
                    450: 
                    451: \subsubsection{Proceedings}
                    452: 
                    453: \begin{description}
                    454: \item [bib type="proceedings"] a conference proceedings publication.
                    455: 
                    456:   \begin{description}
                    457:   \item [author] The author of the article.
                    458:   \item [year] The year of publication.
                    459:   \item [title] Title of the article.
                    460:   \item [editor] Name of the book's editor.
                    461:   \item [conference-name] Name of the conference the proceedings are
                    462:     related to.
                    463:   \item [volume] Volume number.
                    464:   \item [pages] Number of pages of the article.
                    465:   \item [date] Date of the conference the proceedings are related to.
                    466:   \item [conference]-location City where the conference was held.
                    467:   \item [publisher] Name of the publishing company
                    468:   \item [edition] Edition of the book (e. g. third edition)
                    469:   \item [series-editor] Name of the series editor, if the book appears
                    470:     in a series.
                    471:   \item [series-title] Title of the series, if the book appears in a
                    472:     series.
                    473:   \item [number-of-volumes] Number of volumes, if the the book is
                    474:     published as multiple volumes.
                    475:   \item [isbn-issn]
                    476:   \end{description}
                    477: \end{description}
                    478: 
                    479: \subsubsection{Edited Book}
                    480: 
                    481: \begin{description}
                    482: \item[bib type="edited-book"] a book that is the edition of another
                    483:   work.
                    484: 
                    485:   \begin{description}
                    486:   \item [editor] Name of the editor of the book.
                    487:   \item [year] The year of publication.
                    488:   \item [title] Title of the book.
                    489:   \item [series-editor] Name of the editor of the series the book is
                    490:     part of.
                    491:   \item [series-title] Title of the series, if the book is part of a
                    492:     series.
                    493:   \item [series-volume] Volume number, if the book appears in a series.
                    494:   \item [number-of-pages] Number of pages of the article.
                    495:   \item [city] City where the book was published.
                    496:   \item [publisher] Name of the publishing company
                    497:   \item [edition] Information about the edition (e.g. ``Repr. of the London ed. 1652'')
                    498:   \item [number-of-volumes] Number of volumes, if the the book is
                    499:     published as multiple volumes.
                    500:   \item [isbn-issn]
                    501:   \end{description}
                    502: \end{description}
                    503: 
1.17    ! casties   504: \subsubsection{Journal Volume}
        !           505: 
        !           506: \begin{description}
        !           507: \item [bib type="journal-volume"] a volume of a scientific journal.
        !           508:   \begin{description}
        !           509:   \item [title] Name of the journal.
        !           510:   \item [editor] The editor of the journal.
        !           511:   \item [publisher] Name of the publishing company.
        !           512:   \item [city] City where the journal is published.
        !           513:   \item [year] The year of publication.
        !           514:   \item [volume] Volume number.
        !           515:   \item [numer-of-pages] Number of pages of the volume.
        !           516:   \item [isbn-issn]
        !           517:   \end{description}
        !           518: \end{description}
        !           519: 
1.4       casties   520: \subsubsection{Journal Article}
                    521: 
                    522: \begin{description}
                    523: \item [bib type="journal-article"] an article in a scientific journal.
                    524:   \begin{description}
                    525:   \item [author] The author of the article.
                    526:   \item [year] The year of publication.
                    527:   \item [title] Title of the article.
                    528:   \item [journal] Name of the journal.
                    529:   \item [volume] Volume number, if the journal appears in a series.
                    530:   \item [issue] Number of the issue the article is part of.
                    531:   \item [pages] Number of pages of the article.
                    532:   \item [alternate-journal] Alternate Journal
                    533:   \item [isbn-issn]
                    534:   \end{description}
                    535: \end{description}
                    536: 
                    537: \subsubsection{Magazine Article}
                    538: 
                    539: \begin{description}
                    540: \item [bib type="magazine-article"] an article in a popular magazine.
                    541:   \begin{description}
                    542:   \item [author] The author of the book.
                    543:   \item [year] The year of publication.
                    544:   \item [title] Title of the article.
                    545:   \item [magazine] Name of the magazine.
                    546:   \item [volume] Volume number, if the book appears in a series.
                    547:   \item [issue-number] Number of the issue the article is part of.
                    548:   \item [pages Number] of pages of the article.
                    549:   \item [date] Date when the article appeared.
                    550:   \end{description}
                    551: \end{description}
                    552: 
                    553: \subsubsection{Newspaper Article}
                    554: 
                    555: \begin{description}
                    556: \item [bib type="newspaper-article"] an article in a newspaper.
                    557:   \begin{description}
                    558:   \item [author] The author of the article.
                    559:   \item [year] The year of publication.
                    560:   \item [title] Title of the article.
                    561:   \item [Newspaper] Name of the newspaper the article appeared in.
                    562:   \item [pages] Number of pages of the article.
                    563:   \item [issue-date] Date of the issue the article is part of.
                    564:   \item [city] City of the newspaper.
                    565:   \end{description}
                    566: \end{description}
                    567: 
                    568: \subsubsection{Thesis}
                    569: 
                    570: \begin{description}
                    571: \item [bib type="thesis"] a master/doctorate/etc. thesis.
                    572:   \begin{description}
                    573:   \item [author] The author of the thesis.
                    574:   \item [year] The year of publication.
                    575:   \item [title] Title of the thesis.
                    576:   \item [academic-department] Name of the academic department where
                    577:     the thesis was handed in.
                    578:   \item [number-of-pages] Number of pages of the thesis.
                    579:   \item [city] City where the thesis was published.
                    580:   \item [University] Name of the university where the thesis was
                    581:     handed in.
                    582:   \item [isbn-issn]
                    583:   \end{description}
                    584: \end{description}
                    585: 
                    586: \subsubsection{Report}
                    587: 
                    588: \begin{description}
                    589: \item [bib type="report"] a scientific report.
                    590:   \begin{description}
                    591:   \item [author] The author of the report.
                    592:   \item [year] The year of publication.
                    593:   \item [title] Title of the report.
                    594:   \item [pages] Number of pages of the report.
                    595:   \item [date] Date when the report appeared.
                    596:   \item [city] City where the book was published.
                    597:   \item [institution] Institution where the report was produced.
                    598:   \item [type] Type of report.
                    599:   \item [report-number] Report number.
                    600:   \end{description}
                    601: \end{description}
                    602: 
1.5       casties   603: \subsubsection{Manuscript}
                    604: 
                    605: \begin{description}
                    606: \item [bib type="manuscript"] a handwritten/typewritten manuscript.
                    607: 
                    608:   \begin{description}
                    609:   \item [title] Title of the manuscript.
                    610:   \item [author] The author of the text.
                    611:   \item [location] Name of the library where the manuscript is
                    612:     currently located.
                    613:   \item [year] The year or century of publication.
                    614:   \item [pages] Number of pages of the manuscript.
                    615:   \item [signature] Signature of the manuscript.
                    616:   \item [editorial-remarks] Remarks related to the online
                    617:     publication of the manuscript. This could be notes about
                    618:     annotations etc.
                    619:   \item [description] This can be any kind of description.
                    620:   \item [keywords] Keywords related to the manuscript.
                    621:   \end{description}
                    622: \end{description}
                    623: 
                    624: 
1.4       casties   625: \subsubsection{Generic}
                    626: 
                    627: \begin{description}
                    628: \item [bib type="generic"] a generic bibliographic type. This type
                    629:   should only be used in rare cases.
                    630:   \begin{description}
                    631:   \item [author]
                    632:   \item [year]
                    633:   \item [title]
                    634:   \item [secondary-author]
                    635:   \item [secondary-title]
                    636:   \item [volume]
                    637:   \item [number]
                    638:   \item [pages]
                    639:   \item [date]
                    640:   \item [place-published]
                    641:   \item [publisher]
                    642:   \item [edition]
                    643:   \item [tertiary author]
                    644:   \item [tertiary-title]
                    645:   \item [number-of-volumes]
                    646:   \item [type-of-work]
                    647:   \item [subsidiary author]
                    648:   \item [alternate-title]
                    649:   \item [isbn-issn]
                    650:   \item [call-number]
                    651:   \item [label]
                    652:   \item [keywords]
                    653:   \item [abstract]
                    654:   \item [notes]
                    655:   \item [url]
1.5       casties   656:   \end{description}
1.4       casties   657: \end{description}
                    658: 
                    659: 
                    660: \subsection{Architectural drawings}
                    661: \label{sec:doc}
                    662: 
                    663: Specific information for architectural drawings is presented in a
1.5       casties   664: \texttt{doc} container with an additional \texttt{type} attribute
                    665: giving the type of drawing. All elements inside the container can
                    666: appear multiple times.
1.4       casties   667: 
                    668: \begin{description}
1.5       casties   669: 
                    670: \item[doc type="Architectural Drawing"] architectural drawing.
                    671: 
                    672:   \begin{description}
                    673:   \item [person] last name and first name of a person, separated by a
                    674:     comma. A further common name for the person can be put infront,
                    675:     separated by a semicolon.
                    676:   \item [location] Name of a place in its common notation. This can be
                    677:     a city or a institution.
                    678:   \item [date] This can be a year (or several years, separated by
                    679:     commas) or a period (1706-1714). Years are noted with four digits.
                    680:   \item [object] Short description of an object or signatures.
                    681:   \item [keywords] Keywords related to the object.
                    682: \end{description}
1.4       casties   683: \end{description}
1.1       casties   684: 
                    685: 
1.10      casties   686: \subsection{Document structure (table of contents)}
1.1       casties   687: \label{sec:toc}
                    688: 
1.4       casties   689: Information on the structure of a document like the division into
                    690: parts and chapters in the way of a table of contents is presented in a
                    691: \texttt{toc} container. 
                    692: 
                    693: The scheme allows multiple logical pages on a single page image
                    694: as it is often the case with scanned books or manuscripts. The scheme
                    695: also allows for ``loose'' numbering schemes with roman, arabic or
                    696: other page numbers consecutively or mixed and changes in the numbering
                    697: within the document.
                    698: 
                    699: The flexibility comes from the fact that no additional assumptions
                    700: about the mapping between logical pages and page images are made in
                    701: the format. All mapping information is specified by the user.
                    702: 
                    703: The logical page numbering or naming that can be presented to the user
                    704: is specified in the \texttt{name} tags while the physical numbering of
                    705: the page images is specified in the \texttt{index} or \texttt{url}
                    706: tags.
1.1       casties   707: 
1.4       casties   708: \begin{description}
1.5       casties   709: \item[toc] container for document structure
                    710: 
1.4       casties   711:   \begin{description}
1.5       casties   712:   \item[page] describes a single logical page
                    713: 
                    714:     \begin{description}
                    715:     \item[name] the ``name'' of the logical page. This can be any string
                    716:       like a page number (arabic, roman, etc.) or a special designation
                    717:       like ``Table 5''.
                    718:       
                    719:     \item[index] the \texttt{digilib} index number\footnote{The index
                    720:         number for digilib is the index in the alphabetical order of the
                    721:         scan file names.} of the scan image of the page.
                    722:       
                    723:     \item[url] alternatively to the \texttt{digilib} index number the
                    724:       full URL of the scan image of the page can be used.
                    725:     \end{description}
1.4       casties   726:     
1.5       casties   727:   \item[chapter] describes a section or chapter of the text.
                    728:     \texttt{chapter} elements can be nested.
1.1       casties   729: 
1.4       casties   730:     \begin{description}
1.5       casties   731:     \item[name] the title of the chapter or section.
                    732:       
                    733:     \item[start] the beginning of a page range (usually the first page
                    734:       of the chapter). The \texttt{start} element has an optional
                    735:       \texttt{increment} attribute to indicate the number of logical
                    736:       pages on a scan image.\footnote{This information is only needed by
                    737:         additional tools that try to generate lists of all page and
                    738:         image numbers.}
                    739: 
                    740:       \begin{description}
                    741:       \item[name] the ``name'' of the first page (see \texttt{page}).
                    742:         
                    743:       \item[index] the index of the first page (see \texttt{page}).
                    744:         
                    745:       \item[url] the URL of the first page (see \texttt{page}).
                    746:       \end{description}
                    747:       
                    748:     \item[end] the end of a page range (usually the last page of the
                    749:       chapter).
                    750: 
                    751:       \begin{description}
                    752:       \item[name] the ``name'' of the last page (see \texttt{page}).
                    753:         
                    754:       \item[index] the index of the last page (see \texttt{page}).
                    755:         
                    756:       \item[url] the URL of the last page (see \texttt{page}).
                    757:       \end{description}
                    758:       
                    759:     \item[page] alternative (and additional) to
                    760:       \texttt{start}/\texttt{end} page ranges single \texttt{page}
                    761:       elements can be used inside \texttt{chapter}.
1.4       casties   762:     \end{description}
                    763:   \end{description}
                    764: \end{description}
                    765: 
                    766: %%\url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TS_lise}
1.1       casties   767: 
                    768: 
1.12      casties   769: \subsection{Digital images}
1.1       casties   770: \label{sec:inform-scann-imag}
                    771: 
                    772: Image files representing scanned images can have an \texttt{img}
                    773: container tag with information about the scan resolution and the size
                    774: of the original image. This information is used by the
                    775: \texttt{digilib} image viewing tool.
                    776: 
                    777: Required is one of three possible sets of tags:
                    778: 
                    779: \begin{description}
1.5       casties   780: \item[img] digital image information.
1.1       casties   781: 
1.5       casties   782:   \begin{description}
1.12      casties   783:   \item[original-size-x] The width of the original
                    784:     image -- required. \\
                    785:     The unit of measure can be contained as parameter \texttt{unit},
                    786:     the default is meter ``m''. The width to be considered is the
                    787:     total width of the scanned area.
1.5       casties   788:     
1.12      casties   789:   \item[original-size-y] The height of the original image -- required.
1.5       casties   790:     
1.12      casties   791:   \item[original-pixel-x] The width of the hi-res scan in pixels -- deduced.
1.5       casties   792:     
1.12      casties   793:   \item[original-pixel-y] The height of the hi-res scan in pixels -- deduced.
1.5       casties   794:   \end{description}
1.1       casties   795: \end{description}
                    796: 
                    797: or
                    798: 
                    799: \begin{description}
1.5       casties   800: \item[img] digital image information.
                    801: 
                    802:   \begin{description}
                    803:   \item[original-dpi-x] The resolution of the hi-res scan in its width
1.12      casties   804:     in pixels per inch -- required.
1.1       casties   805: 
1.5       casties   806:   \item[original-dpi-y] The resolution of the hi-res scan in its height
1.12      casties   807:     in pixels per inch -- required.
                    808: 
                    809:   \item[original-pixel-x] The width of the hi-res scan in pixels -- deduced.
                    810:     
                    811:   \item[original-pixel-y] The height of the hi-res scan in pixels -- deduced.
1.5       casties   812:   \end{description}
1.1       casties   813: \end{description}
                    814: 
                    815: or
                    816: 
                    817: \begin{description}
1.5       casties   818: \item[img] digital image information.
                    819: 
                    820:   \begin{description}
                    821:   \item[original-dpi] The resolution of the hi-res scan in pixels per
1.12      casties   822:     inch if the resolutions in width and height are the same -- required.
                    823: 
                    824:   \item[original-pixel-x] The width of the hi-res scan in pixels -- deduced.
                    825:     
                    826:   \item[original-pixel-y] The height of the hi-res scan in pixels -- deduced.
1.5       casties   827:   \end{description}
1.1       casties   828: \end{description}
1.7       casties   829: 
                    830: 
1.10      casties   831: 
1.12      casties   832: \subsection{Digital image acquisition}
1.10      casties   833: \label{sec:inform-about-image}
                    834: 
                    835: A description of the technology used in the process of producing a
                    836: digital image.
                    837: 
                    838: \begin{description}
                    839: \item[image-acquisition] description of the image production process
                    840:   \begin{description}
1.12      casties   841:   \item[device] acquisition device (e.g. ``flatbed scanner'') 
1.10      casties   842: 
1.12      casties   843:   \item[image-type] type and color-depth of the image -- required (e.g. ``RGB 24
1.10      casties   844:     bit'')
                    845: 
                    846:   \item[production-comment] additional textual information about the
                    847:     production process
                    848:   \end{description}
                    849: \end{description}
                    850: 
                    851: 
1.12      casties   852: 
1.7       casties   853: \subsection{Full text with images}
                    854: \label{sec:full-text-with}
                    855: 
1.12      casties   856: Full text in a XML format should be specified with a
                    857: \texttt{content-type}\footnote{see section~\ref{tag-content-type}
                    858: on page\pageref{tag-content-type}} ``fulltext''.
1.8       casties   859: 
                    860: The relation between the full text and optional images of
                    861: whole pages or parts of pages must be specified in a
                    862: \texttt{text-tool} container.
                    863: 
                    864: \begin{description}
                    865: \item[text-tool] representation of full text with images
                    866: 
                    867:   \begin{description}
                    868:   \item[text-file] the file name of the full text file (with path
                    869:     inside document directory)
1.12      casties   870:     
1.8       casties   871:   \item[page-images] the directory name of the directory containig the
1.12      casties   872:     page image files (with path inside document directory)
1.8       casties   873: 
                    874:   \item[xslt-file] the file name of an additional XSL transformation
                    875:     file
                    876: 
                    877:   \item[text-config] container for configuration options
1.10      casties   878:     \begin{description}
                    879:     \item[container-tag] the name of the text root element (default
                    880:       ``text'')
                    881:       
                    882:     \item[ref-element-tag] the name of the element that is used as
                    883:       unit of reference when results are presented
1.8       casties   884:     
1.10      casties   885:     \item[pagebreak-tag] the name of the element that indicates page
                    886:       breaks (default ``pb'')
                    887:     \end{description}
1.8       casties   888:   \end{description}
                    889: \end{description}
1.7       casties   890: 
1.1       casties   891: 
                    892: 
1.12      casties   893: \subsection{Copyright and access conditions}
                    894: \label{sec:access-conditions}
                    895: 
                    896: If the access to a resource is bound to conditions for technical or legal
                    897: reasons then the conditions can be put in a \texttt{access-conditions}
1.16      casties   898: container. Other usage conditions like copyright can also be
1.12      casties   899: documented in this container.
                    900: 
                    901: \begin{description}
                    902: \item[access-conditions] legal and technical conditions for access to
                    903:   this resource
                    904: 
                    905:   \begin{description}
                    906:   \item[attribution] The name or institution this resource should be
                    907:     attributed to when it's publicly presented
                    908: 
                    909:     \begin{description}
                    910:     \item[name] a name (free text)
                    911: 
                    912:     \item[url] a URL (with an optional \texttt{label} attribute to show
                    913:       as text)
                    914:     \end{description}
                    915: 
1.16      casties   916:   \item[copyright] the copyright holder and it's conditions
1.12      casties   917:     \begin{description}
1.16      casties   918:     \item[owner] the name of the copyright holder
1.12      casties   919:       \begin{description}
                    920:       \item[name] a name (free text)
                    921: 
                    922:       \item[url] a URL (with an optional \texttt{label} attribute to show
                    923:         as text)
                    924:       \end{description}
                    925: 
                    926:     \item[date] the date when the copyright was issued
                    927: 
1.16      casties   928:     \item[duration] the duration of the copyright term (if known)
1.12      casties   929: 
                    930:     \item[description] free-text field for special or additional
                    931:       conditions
                    932:     \end{description}
1.14      casties   933: 
                    934: 
                    935:   \item[publish-metadata] metadata about this resource can be made
1.16      casties   936:     freely available when this tag is present (otherwise metadata has
                    937:     the same access conditions as the rest of the resource). Access to
                    938:     the resource itself is regulated separately by the \texttt{access}
                    939:     element.
1.12      casties   940: 
1.16      casties   941:   \item[access] conditions of access to this resource. Different
                    942:     access types are specified by a \texttt{type} attribute:
1.12      casties   943:     \begin{description}
1.16      casties   944:     \item[type=group] access restricted to the members of this named
                    945:       group. The method to identify a user belonging to a named group
                    946:       is not specified in this document.
                    947:       \begin{description}
                    948:       \item[name] name of the group.
                    949: 
                    950:       \item[only-before] the access condition is only valid before the
                    951:         given date (format: ``YYYY/MM/DD'').
                    952: 
                    953:       \item[only-after] the access condition is only valid after the
                    954:         given date (format: ``YYYY/MM/DD'').
                    955:       \end{description}
                    956:     
                    957:     \item[type=institution] access restricted to the members of this
                    958:       institution. The method to identify a user to belong to the
                    959:       institution is not specified in this document.
1.12      casties   960:       \begin{description}
1.16      casties   961:       \item[name] name of the group.
                    962: 
                    963:       \item[only-before] the access condition is only valid before the
                    964:         given date (format: ``YYYY/MM/DD'').
                    965: 
                    966:       \item[only-after] the access condition is only valid after the
                    967:         given date (format: ``YYYY/MM/DD'').
                    968:       \end{description}
                    969:     
                    970: 
                    971:     \item[type=subnet] access restricted to all computers with an
                    972:       IP-address in this subnet.
                    973:       \begin{description}
                    974:       \item[range] subnet range defined in
                    975:       truncated-quad (e.g. ``141.14''), network-netmask
                    976:       (e.g. ``141.14.0.0/255.255.0.0''), or network-range
                    977:       (e.g. ``141.14.0.0/16'') notation.
                    978: 
                    979:       \item[only-before] the access condition is only valid before the
                    980:         given date (format: ``YYYY/MM/DD'').
                    981: 
                    982:       \item[only-after] the access condition is only valid after the
                    983:         given date (format: ``YYYY/MM/DD'').
                    984:       \end{description}
                    985:     
1.12      casties   986:         
1.16      casties   987:     \item[type=scientific] access to this resource should be restricted to
                    988:       scientific work
                    989:       \begin{description}
                    990:       \item[only-before] the access condition is only valid before the
                    991:         given date (format: ``YYYY/MM/DD'').
                    992: 
                    993:       \item[only-after] the access condition is only valid after the
                    994:         given date (format: ``YYYY/MM/DD'').
1.12      casties   995:       \end{description}
1.16      casties   996:     
1.12      casties   997: 
1.16      casties   998:     \item[type=free] access to this resource is not restricted
                    999:       \begin{description}
                   1000:       \item[only-before] the access condition is only valid before the
                   1001:         given date (format: ``YYYY/MM/DD'').
1.12      casties  1002: 
1.16      casties  1003:       \item[only-after] the access condition is only valid after the
                   1004:         given date (format: ``YYYY/MM/DD'').
                   1005:       \end{description}
                   1006:     
1.12      casties  1007:       
1.16      casties  1008:     \item[type=special] if none of the above conditions seems appropriate,
1.12      casties  1009:       a free-form text can be specified here.
1.16      casties  1010:       \begin{description}
                   1011:       \item[description] description of special access conditions.
                   1012: 
                   1013:       \item[only-before] the access condition is only valid before the
                   1014:         given date (format: ``YYYY/MM/DD'').
                   1015: 
                   1016:       \item[only-after] the access condition is only valid after the
                   1017:         given date (format: ``YYYY/MM/DD'').
                   1018:       \end{description}
                   1019:     
1.12      casties  1020:     \end{description}
                   1021:   \end{description}
                   1022: \end{description}
                   1023: 
                   1024: \noindent
1.16      casties  1025: It should be noted that control over access to the resource has to be
                   1026: provided by additional technical measures. Access conditions in the
                   1027: metadata file only state that conditions \emph{should} be observed, it
                   1028: is not implied that they \emph{are} necessarily observed, as the
                   1029: enforcement of conditions depends on additional measures.
1.12      casties  1030: 
                   1031: 
                   1032: 
                   1033: \subsection{Acquisition of raw-data}
                   1034: \label{sec:acqu-inform}
                   1035: 
                   1036: Information about the acquisition source for raw data resources can be
                   1037: provided in an \texttt{acquisition} container.
                   1038: 
                   1039: \begin{description}
                   1040: \item[acquisition] the acquisition source of this resource -- required
                   1041:   for raw data.
                   1042:   \begin{description}
                   1043:   \item[provider] where this resource came from -- required
                   1044:     \begin{description}
                   1045:     \item[name] free-text name of the provider (institution or
                   1046:       individual)
                   1047: 
                   1048:     \item[address] address of the provider
                   1049: 
                   1050:     \item[contact] contact person at the provider (i.e. name and email)
                   1051: 
                   1052:     \item[url] URL related to the provider
1.13      casties  1053: 
                   1054:     \item[provider-id] id of the provider (internally used) -- deduced
1.12      casties  1055:     \end{description}
                   1056: 
                   1057:   \item[date] date of acquisition -- required
                   1058: 
                   1059:   \item[description] free-text description of the acquisition source or
                   1060:   additional information
                   1061:   \end{description}
                   1062: \end{description}
                   1063: 
                   1064: 
                   1065: 
                   1066: \subsection{Documentary Films}
                   1067: \label{sec:documentary-films}
                   1068: 
                   1069: Documentary films can be described using a \texttt{film-acquisition}
                   1070: container.
                   1071: 
                   1072: \begin{description}
                   1073: \item[film-acquisition] description of a (documentary) film --
                   1074:   required for documentary film
                   1075:   \begin{description}
                   1076:   \item[recording] specification of the recording process
                   1077:     \begin{description}
                   1078:     \item[author] the person or persons doing the recording
                   1079: 
                   1080:     \item[date] the date or time span when the film was recorded
                   1081: 
                   1082:     \item[location] the place where the film was recorded
                   1083: 
                   1084:     \item[device] recording device used (e.g. ``Sony CP-DV8 Camcorder'')
                   1085:       
                   1086:     \item[format] format of the recorded film -- required (e.g. ``DV
                   1087:       720x524 25fps interlaced'')
                   1088:     \end{description}
                   1089:  
                   1090:   \item[description] free-form description of the recording and the
                   1091:     content of the film
                   1092:   \end{description}
                   1093: \end{description}
                   1094: 
                   1095: (More information about the digitization step could be added in a
                   1096: \texttt{digitization} tag similar to the \texttt{recording} tag.)
                   1097: 
1.1       casties  1098: 
                   1099: 
                   1100: 
1.4       casties  1101: \section{Sample metadata files for ECHO resources}
1.1       casties  1102: 
1.5       casties  1103: The following is a sample metadata index file for a directory containig a
                   1104: scanned document.
                   1105: 
                   1106: \begin{small}
1.1       casties  1107: \begin{verbatim}
1.11      casties  1108: <resource type="ECHO" version="1.0">
1.5       casties  1109:   <description>Fleck, 1980</description>
                   1110:   <name>fleck.1980</name>
                   1111:   <creator>University of Bern</creator>
                   1112:   <archive-path>ubern/wiss-theorie</archive-path>
                   1113:   <content-type>scanned images</content-type>
                   1114:   <meta>
                   1115:     <dri>echo23a45e2329x</dri>
                   1116:     <lang>ger</lang>
                   1117:     <bib type="book">
                   1118:       <author>Fleck, Ludwik</author>
                   1119:       <year>1980</year>
                   1120:       <title>Entstehung und Entwicklung einer 
                   1121:              wissenschaftlichen Tatsache</title>
                   1122:       <series-editor></series-editor>
                   1123:       <series-title></series-title>
                   1124:       <series-volume></series-volume>
                   1125:       <number-of-pages></number-of-pages>
                   1126:       <city>Frankfurt am Main</city>
                   1127:       <publisher>Suhrkamp</publisher>
                   1128:       <edition></edition>
                   1129:       <number-of-volumes></number-of-volumes>
                   1130:       <translator></translator>
                   1131:       <isbn-issn></isbn-issn>
                   1132:       <keywords>Wissenschaftstheorie, Fleck, Tatsache</keywords>
                   1133:       <abstract></abstract>
                   1134:     </bib>
                   1135:   </meta>
                   1136:   <dir>
                   1137:      <description>Scanned images (300dpi)</description>
                   1138:      <name>img</name>
                   1139:   </dir>
1.4       casties  1140: </resource>
                   1141: \end{verbatim}
1.5       casties  1142: \end{small}
1.4       casties  1143: 
1.5       casties  1144: The following is a sample metadata file for a single image of an
                   1145: architectural drawing.
1.4       casties  1146: 
1.5       casties  1147: \begin{small}
1.4       casties  1148: \begin{verbatim}
1.11      casties  1149: <resource type="ECHO" version="1.0">
1.5       casties  1150:   <creator>Bibliotheca Hertziana</creator>
                   1151:   <content-type>scanned images</content-type>
                   1152:   <file>
                   1153:     <name>00000271-asl-160-r-full.tif</name>
                   1154:     <meta>
                   1155:       <img>
                   1156:         <original-dpi>315</original-dpi>
                   1157:       </img>
                   1158:       <dri>echo45a67bc4367d</dri>
                   1159:       <lang>ita</lang>
                   1160:       <doc type="Architectural Drawing">
                   1161:         <person>Ciolli, Giacomo</person>
                   1162:         <person>Urban VIII; Barberini, Maffeo</person>
                   1163:         <location>Accademia di San Luca</location>
                   1164:         <location>Roma</location>
                   1165:         <date>1706</date>
                   1166:         <object>Concorso Clementino</object>
                   1167:         <object>Fontana Pubblica</object>
                   1168:         <object>Brunnen</object>
                   1169:         <object>ASL 160</object>
                   1170:         <keywords></keywords>
                   1171:       </doc>
                   1172:       <context>
                   1173:          <url>http://colosseum.biblhertz.it:8080/Lineamenta/
                   1174:          1033478408.39/1035196181.35/1035196204.09/1035394121.83
                   1175:          </url>
                   1176:       </context>
                   1177:     </meta>
                   1178:   </file>
1.2       casties  1179: </resource>
1.1       casties  1180: \end{verbatim}
1.5       casties  1181: \end{small}
1.1       casties  1182: 
                   1183: \end{document}
                   1184: 
                   1185: %%% Local Variables: 
                   1186: %%% mode: latex
                   1187: %%% TeX-master: t
                   1188: %%% End: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>