Annotation of storage/meta/meta-format.tex, revision 1.3

1.1       casties     1: \documentclass[a4paper]{article}
                      2: 
                      3: \usepackage[latin1]{inputenc}
                      4: \usepackage[T1]{fontenc}
                      5: \usepackage{ae}
                      6: %\usepackage{times}
                      7: %\usepackage{courier}
                      8: 
                      9: % create in-text links black (with PDF)
                     10: \usepackage[colorlinks=true,linkcolor=black]{hyperref}
                     11: % Format URLs nicely (without PDF)
                     12: %\usepackage{url}
                     13: 
                     14: 
                     15: \title{A simple metadata format for resource bundles}
                     16: 
                     17: \author{Robert Casties, Dirk Wintergrün, Christoph Liess}
                     18: 
1.3     ! casties    19: \date{V0.2.2 of \today}
1.1       casties    20: 
                     21: \begin{document}
                     22: 
                     23: \maketitle
                     24: 
                     25: \tableofcontents
                     26: 
                     27: 
                     28: \section{File and directory names}
                     29: \label{sec:file-directory-names}
                     30: 
                     31: File and directory names should not contain spaces. Allowed characters
                     32: in filenames are only the alphanumeric set a-z, A-Z, 0-9, hyphen
                     33: ``-'', underscore ``\_'' and dot ``.''.
                     34: 
                     35: File and directory paths in the metadata file use the conventional
                     36: Unix file separator slash ``/''.
                     37: 
                     38: \section{Resource format}
                     39: \label{sec:mpiwg-doc}
                     40: 
                     41: In this description elements marked ``optional'' need not be supplied
                     42: by the provider of the resource and may be absent in all versions of
                     43: the metadata file. Elements marked ``required'' must be supplied by
                     44: the provider of the resource. Elements marked ``deduced'' can be
                     45: supplied by the provider of the resource but can also be provided by
                     46: automatic scripts later in the process, the elements must be present
                     47: in the final file.
                     48: 
                     49: The outer container is named \texttt{resource}. Sub-types (``ECHO'',
                     50: ``MPIWG'') can be specified if necessary with a \texttt{type}
                     51: parameter. Its sub-elements are:
                     52: 
                     53: \begin{description}
                     54: \item[description] An informal textual description of the
                     55:   resource -- optional.
                     56: 
                     57: \item[name] The filename of the resource (name of the directory this
                     58:   file is contained in) -- required.
                     59: 
                     60: \item[creator] The name of the project or person that created the
                     61:   resource -- optional.
                     62: 
                     63: \item[archive-creation-date] The time and date the archive was created
                     64:   -- deduced.
                     65: 
                     66: \item[archive-path] The full path to the resource directory inside the
                     67:   whole archive collection -- deduced.
                     68:   
                     69: \item[derived-from] Container for the description of the original
                     70:   resource if this resource is a modified version of another resource
                     71:   -- optional.
                     72: 
                     73:   \begin{description}
                     74:   \item[archive-path] The full path to the original resource
                     75:     --required.
                     76: 
                     77:   \item[description] An informal textual description of the relation
                     78:   of this resource to the original resource -- optional.
                     79:   \end{description}
                     80:   
                     81: \item[linked-with] Container for the description of another
                     82:   resource when this resource is a linked copy of another resource
                     83:   -- optional.
                     84: 
                     85:   \begin{description}
                     86:   \item[archive-path] The full path to the linked resource
                     87:     --required.
                     88: 
                     89:   \item[description] An informal textual description of the relation
                     90:   of this resource to the linked resource -- optional.
                     91:   \end{description}
                     92:   
                     93: \item[content-type] The content type of this resource -- required.\\
                     94:   The content type enables the choice of tools to manipulate and
                     95:   display the resource. There should be a common list of content
                     96:   types. For digital documents (books, manuscripts) this would be
                     97:   "scanned document", for other image data "scanned
                     98:   images".\footnote{The criterion for documents is a ordered
                     99:     succession of image files (pages) and equal image size and
                    100:     resolution throughout the images of a resource.}
                    101:   
                    102: \item[meta] Additional metadata information about the resource --
                    103:   optional.\\ For a description of additional metadata see below.
                    104: 
                    105: \item[dir] Container for the description of a subdirectory -- required
                    106:   (when there are subdirectories).\\ \texttt{dir} tags should not be
                    107:   nested. Directories at lower levels are identified by their
                    108:   \texttt{path}.
                    109: 
                    110:   \begin{description}
                    111:   \item[description] An informal textual description of the
                    112:     subdirectory -- optional.
                    113: 
                    114:   \item[name] The name of the subdirectory -- required.
                    115:     
                    116:   \item[path] The directory path of this subdirectory relative to the
1.3     ! casties   117:     resource's root directory (containing the directory itself) --
        !           118:     required (may be identical to \texttt{name} or omitted if the
        !           119:     directory is a direct child of the resource's root directory).
1.1       casties   120:     
                    121:   \item[meta] Additional metadata information about the directory --
                    122:     optional.\\ For a description of additional metadata see below.
                    123:   \end{description}
                    124:   
                    125: \item[file] Container for the description of a file -- deduced.\\
                    126:   \texttt{file} tags should not be nested in \texttt{dir} tags. Files
                    127:   at lower directory levels are identified by their \texttt{path}.
                    128: 
                    129:   \begin{description}
                    130:   \item[description] An informal textual description of the
                    131:     file -- optional.
                    132: 
                    133:   \item[name] The name of the file -- required.
                    134:     
                    135:   \item[path] The directory path of this file relative to the
1.3     ! casties   136:     resource's root directory (containing the file itself) -- required
        !           137:     (may be identical to \texttt{name} or omitted if the file is in the
        !           138:     resource's root directory).
1.1       casties   139: 
                    140:   \item[modification-date] The file's modification date -- optional.
                    141: 
                    142:   \item[creation-date] The file's creation date -- optional.
                    143: 
                    144:   \item[date] The file's creation date if is has not been modified --
                    145:     optional.
                    146: 
                    147:   \item[size] The file size -- deduced.
                    148:     
                    149:   \item[mime-type] The file's mime-type -- optional.
                    150: 
                    151:   \item[md5cs] MD5 checksum of the file content -- optional.
                    152:     
                    153:   \item[meta] Additional metadata information about the file --
                    154:     optional. For a description of additional metadata see below.
                    155:   \end{description}
                    156:   
                    157: \end{description}
                    158: 
                    159: 
                    160: 
                    161: \section{Additional metadata}
                    162: \label{sec:additional-metadata}
                    163: 
                    164: All elements with \texttt{meta} tags can contain an arbitrary number
                    165: of additional metadata elements.
                    166: 
                    167: 
                    168: \subsection{DRI}
                    169: \label{sec:dri}
                    170: 
                    171: The \emph{digital resource identifier} for the resource is specified
                    172: with a \texttt{dri} tag. Digital resource identifiers are documented
                    173: on the page
                    174: 
                    175: \url{http://pythia.mpiwg-berlin.mpg.de/projects/standards/dri}.
                    176: 
                    177: 
                    178: \subsection{Bibliographic information}
                    179: \label{sec:bibliographic-data}
                    180: 
                    181: Bibliographic information in the format of the ECHO scheme for
                    182: bibliographic data (cf. content workflow) or the MPIWG
                    183: ``Projektbibliografie'' is presented in a \texttt{bib} container with
                    184: a \texttt{type} parameter, giving the type of bibliographic resource.
                    185: The \texttt{type} field is repeated as a tag in the container. The
                    186: tags have the variable ``human-readable'' field names.
                    187: 
                    188: 
                    189: \subsection{Information on the document structure (table of contents)}
                    190: \label{sec:toc}
                    191: 
                    192: Document structure information like a table of contents for a scanned
                    193: document is presented in a \texttt{toc} container. The format to be
                    194: used has to be further specified. The format could be based on the so
                    195: called ``LiSe-XML'' format. For a detailed description and an
                    196: exemplary set of TOC information see:
                    197: 
                    198: \url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TS_lise}
                    199: 
                    200: \url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TSlise/lise_downloads/deimel1929.xml}
                    201: 
                    202: 
                    203: \subsection{Information on scanned images}
                    204: \label{sec:inform-scann-imag}
                    205: 
                    206: Image files representing scanned images can have an \texttt{img}
                    207: container tag with information about the scan resolution and the size
                    208: of the original image. This information is used by the
                    209: \texttt{digilib} image viewing tool.
                    210: 
                    211: Required is one of three possible sets of tags:
                    212: 
                    213: \begin{description}
                    214: \item[original-size-x] The width of the original image. The unit of
                    215:   measure can be contained as parameter \texttt{unit}, the default is
                    216:   meter ``m''. The width to be considered is the total width of the
                    217:   scanned area.
                    218: 
                    219: \item[original-size-y] The height of the original image.
                    220: 
                    221: \item[original-pixel-x] The width of the hi-res scan in pixels.
                    222: 
                    223: \item[original-pixel-y] The height of the hi-res scan in pixels.
                    224: \end{description}
                    225: 
                    226: or
                    227: 
                    228: \begin{description}
                    229: \item[original-dpi-x] The resolution of the hi-res scan in its width
                    230:   in pixels per inch.
                    231: 
                    232: \item[original-dpi-y] The resolution of the hi-res scan in its height
                    233:   in pixels per inch.
                    234: \end{description}
                    235: 
                    236: or
                    237: 
                    238: \begin{description}
                    239: \item[original-dpi] The resolution of the hi-res scan in pixels per
                    240:   inch if the resolutions in width and height are the same.
                    241: \end{description}
                    242: 
                    243: 
                    244: \subsection{Access restrictions}
                    245: \label{sec:access-restrictions}
                    246: 
                    247: If the access to a resource is restricted for technical or legal
                    248: reasons then the restrictions can be put in a
                    249: \texttt{access-restrictions} container. The format of the information
                    250: inside the container has to be further specified.
                    251: 
                    252: 
                    253: \section{Sample metadata file for an ECHO resource}
                    254: 
                    255: The following is the sample structure for a scanned document resource.
                    256: 
                    257: \begin{verbatim}
                    258: <resource type="ECHO">
                    259:     <description></description>
                    260:     <name>fleck.1980</name>
                    261:     <creator>University of Bern</creator>
                    262:     <archive-creation-date></archive-creation-date>
                    263:     <archive-path>ubern/wiss-theorie</archive-path>
                    264:     <content-type>scanned images</content-type>
                    265:     <meta>
                    266:         <dri>echo23a45e2329x</dri>
                    267:         <bib type="book">
                    268:             <author>Fleck, Ludwik</author>
                    269:             <year>1980</year>
                    270:             <title>Entstehung und Entwicklung einer 
                    271:                    wissenschaftlichen Tatsache</title>
                    272:             <series_editor></series_editor>
                    273:             <series_title></series_title>
                    274:             <series_volume></series_volume>
                    275:             <number_of_pages></number_of_pages>
                    276:             <city>Frankfurt am Main</city>
                    277:             <publisher>Suhrkamp</publisher>
                    278:             <edition></edition>
                    279:             <number_of_volumes></number_of_volumes>
                    280:             <translator></translator>
                    281:             <isbn></isbn>
                    282:             <keywords>Wissenschaftstheorie, Fleck, Tatsache</keywords>
                    283:             <abstract></abstract>
                    284:         </bib>
                    285:     </meta>
                    286:     <dir>
                    287:          <description>Scanned images (300dpi)</description>
                    288:          <name>img</name>
                    289:          <path></path>
                    290:          <meta></meta>
                    291:     </dir>
1.2       casties   292: </resource>
1.1       casties   293: \end{verbatim}
                    294: 
                    295: \end{document}
                    296: 
                    297: %%% Local Variables: 
                    298: %%% mode: latex
                    299: %%% TeX-master: t
                    300: %%% End: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>