File:  [Repository] / storage / meta / meta-format.tex
Revision 1.3: download - view: text, annotated - select for diffs - revision graph
Tue Jul 1 17:51:40 2003 UTC (21 years ago) by casties
Branches: MAIN
CVS tags: HEAD
clarified <path> element

    1: \documentclass[a4paper]{article}
    2: 
    3: \usepackage[latin1]{inputenc}
    4: \usepackage[T1]{fontenc}
    5: \usepackage{ae}
    6: %\usepackage{times}
    7: %\usepackage{courier}
    8: 
    9: % create in-text links black (with PDF)
   10: \usepackage[colorlinks=true,linkcolor=black]{hyperref}
   11: % Format URLs nicely (without PDF)
   12: %\usepackage{url}
   13: 
   14: 
   15: \title{A simple metadata format for resource bundles}
   16: 
   17: \author{Robert Casties, Dirk Wintergrün, Christoph Liess}
   18: 
   19: \date{V0.2.2 of \today}
   20: 
   21: \begin{document}
   22: 
   23: \maketitle
   24: 
   25: \tableofcontents
   26: 
   27: 
   28: \section{File and directory names}
   29: \label{sec:file-directory-names}
   30: 
   31: File and directory names should not contain spaces. Allowed characters
   32: in filenames are only the alphanumeric set a-z, A-Z, 0-9, hyphen
   33: ``-'', underscore ``\_'' and dot ``.''.
   34: 
   35: File and directory paths in the metadata file use the conventional
   36: Unix file separator slash ``/''.
   37: 
   38: \section{Resource format}
   39: \label{sec:mpiwg-doc}
   40: 
   41: In this description elements marked ``optional'' need not be supplied
   42: by the provider of the resource and may be absent in all versions of
   43: the metadata file. Elements marked ``required'' must be supplied by
   44: the provider of the resource. Elements marked ``deduced'' can be
   45: supplied by the provider of the resource but can also be provided by
   46: automatic scripts later in the process, the elements must be present
   47: in the final file.
   48: 
   49: The outer container is named \texttt{resource}. Sub-types (``ECHO'',
   50: ``MPIWG'') can be specified if necessary with a \texttt{type}
   51: parameter. Its sub-elements are:
   52: 
   53: \begin{description}
   54: \item[description] An informal textual description of the
   55:   resource -- optional.
   56: 
   57: \item[name] The filename of the resource (name of the directory this
   58:   file is contained in) -- required.
   59: 
   60: \item[creator] The name of the project or person that created the
   61:   resource -- optional.
   62: 
   63: \item[archive-creation-date] The time and date the archive was created
   64:   -- deduced.
   65: 
   66: \item[archive-path] The full path to the resource directory inside the
   67:   whole archive collection -- deduced.
   68:   
   69: \item[derived-from] Container for the description of the original
   70:   resource if this resource is a modified version of another resource
   71:   -- optional.
   72: 
   73:   \begin{description}
   74:   \item[archive-path] The full path to the original resource
   75:     --required.
   76: 
   77:   \item[description] An informal textual description of the relation
   78:   of this resource to the original resource -- optional.
   79:   \end{description}
   80:   
   81: \item[linked-with] Container for the description of another
   82:   resource when this resource is a linked copy of another resource
   83:   -- optional.
   84: 
   85:   \begin{description}
   86:   \item[archive-path] The full path to the linked resource
   87:     --required.
   88: 
   89:   \item[description] An informal textual description of the relation
   90:   of this resource to the linked resource -- optional.
   91:   \end{description}
   92:   
   93: \item[content-type] The content type of this resource -- required.\\
   94:   The content type enables the choice of tools to manipulate and
   95:   display the resource. There should be a common list of content
   96:   types. For digital documents (books, manuscripts) this would be
   97:   "scanned document", for other image data "scanned
   98:   images".\footnote{The criterion for documents is a ordered
   99:     succession of image files (pages) and equal image size and
  100:     resolution throughout the images of a resource.}
  101:   
  102: \item[meta] Additional metadata information about the resource --
  103:   optional.\\ For a description of additional metadata see below.
  104: 
  105: \item[dir] Container for the description of a subdirectory -- required
  106:   (when there are subdirectories).\\ \texttt{dir} tags should not be
  107:   nested. Directories at lower levels are identified by their
  108:   \texttt{path}.
  109: 
  110:   \begin{description}
  111:   \item[description] An informal textual description of the
  112:     subdirectory -- optional.
  113: 
  114:   \item[name] The name of the subdirectory -- required.
  115:     
  116:   \item[path] The directory path of this subdirectory relative to the
  117:     resource's root directory (containing the directory itself) --
  118:     required (may be identical to \texttt{name} or omitted if the
  119:     directory is a direct child of the resource's root directory).
  120:     
  121:   \item[meta] Additional metadata information about the directory --
  122:     optional.\\ For a description of additional metadata see below.
  123:   \end{description}
  124:   
  125: \item[file] Container for the description of a file -- deduced.\\
  126:   \texttt{file} tags should not be nested in \texttt{dir} tags. Files
  127:   at lower directory levels are identified by their \texttt{path}.
  128: 
  129:   \begin{description}
  130:   \item[description] An informal textual description of the
  131:     file -- optional.
  132: 
  133:   \item[name] The name of the file -- required.
  134:     
  135:   \item[path] The directory path of this file relative to the
  136:     resource's root directory (containing the file itself) -- required
  137:     (may be identical to \texttt{name} or omitted if the file is in the
  138:     resource's root directory).
  139: 
  140:   \item[modification-date] The file's modification date -- optional.
  141: 
  142:   \item[creation-date] The file's creation date -- optional.
  143: 
  144:   \item[date] The file's creation date if is has not been modified --
  145:     optional.
  146: 
  147:   \item[size] The file size -- deduced.
  148:     
  149:   \item[mime-type] The file's mime-type -- optional.
  150: 
  151:   \item[md5cs] MD5 checksum of the file content -- optional.
  152:     
  153:   \item[meta] Additional metadata information about the file --
  154:     optional. For a description of additional metadata see below.
  155:   \end{description}
  156:   
  157: \end{description}
  158: 
  159: 
  160: 
  161: \section{Additional metadata}
  162: \label{sec:additional-metadata}
  163: 
  164: All elements with \texttt{meta} tags can contain an arbitrary number
  165: of additional metadata elements.
  166: 
  167: 
  168: \subsection{DRI}
  169: \label{sec:dri}
  170: 
  171: The \emph{digital resource identifier} for the resource is specified
  172: with a \texttt{dri} tag. Digital resource identifiers are documented
  173: on the page
  174: 
  175: \url{http://pythia.mpiwg-berlin.mpg.de/projects/standards/dri}.
  176: 
  177: 
  178: \subsection{Bibliographic information}
  179: \label{sec:bibliographic-data}
  180: 
  181: Bibliographic information in the format of the ECHO scheme for
  182: bibliographic data (cf. content workflow) or the MPIWG
  183: ``Projektbibliografie'' is presented in a \texttt{bib} container with
  184: a \texttt{type} parameter, giving the type of bibliographic resource.
  185: The \texttt{type} field is repeated as a tag in the container. The
  186: tags have the variable ``human-readable'' field names.
  187: 
  188: 
  189: \subsection{Information on the document structure (table of contents)}
  190: \label{sec:toc}
  191: 
  192: Document structure information like a table of contents for a scanned
  193: document is presented in a \texttt{toc} container. The format to be
  194: used has to be further specified. The format could be based on the so
  195: called ``LiSe-XML'' format. For a detailed description and an
  196: exemplary set of TOC information see:
  197: 
  198: \url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TS_lise}
  199: 
  200: \url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TSlise/lise_downloads/deimel1929.xml}
  201: 
  202: 
  203: \subsection{Information on scanned images}
  204: \label{sec:inform-scann-imag}
  205: 
  206: Image files representing scanned images can have an \texttt{img}
  207: container tag with information about the scan resolution and the size
  208: of the original image. This information is used by the
  209: \texttt{digilib} image viewing tool.
  210: 
  211: Required is one of three possible sets of tags:
  212: 
  213: \begin{description}
  214: \item[original-size-x] The width of the original image. The unit of
  215:   measure can be contained as parameter \texttt{unit}, the default is
  216:   meter ``m''. The width to be considered is the total width of the
  217:   scanned area.
  218: 
  219: \item[original-size-y] The height of the original image.
  220: 
  221: \item[original-pixel-x] The width of the hi-res scan in pixels.
  222: 
  223: \item[original-pixel-y] The height of the hi-res scan in pixels.
  224: \end{description}
  225: 
  226: or
  227: 
  228: \begin{description}
  229: \item[original-dpi-x] The resolution of the hi-res scan in its width
  230:   in pixels per inch.
  231: 
  232: \item[original-dpi-y] The resolution of the hi-res scan in its height
  233:   in pixels per inch.
  234: \end{description}
  235: 
  236: or
  237: 
  238: \begin{description}
  239: \item[original-dpi] The resolution of the hi-res scan in pixels per
  240:   inch if the resolutions in width and height are the same.
  241: \end{description}
  242: 
  243: 
  244: \subsection{Access restrictions}
  245: \label{sec:access-restrictions}
  246: 
  247: If the access to a resource is restricted for technical or legal
  248: reasons then the restrictions can be put in a
  249: \texttt{access-restrictions} container. The format of the information
  250: inside the container has to be further specified.
  251: 
  252: 
  253: \section{Sample metadata file for an ECHO resource}
  254: 
  255: The following is the sample structure for a scanned document resource.
  256: 
  257: \begin{verbatim}
  258: <resource type="ECHO">
  259:     <description></description>
  260:     <name>fleck.1980</name>
  261:     <creator>University of Bern</creator>
  262:     <archive-creation-date></archive-creation-date>
  263:     <archive-path>ubern/wiss-theorie</archive-path>
  264:     <content-type>scanned images</content-type>
  265:     <meta>
  266:         <dri>echo23a45e2329x</dri>
  267:         <bib type="book">
  268:             <author>Fleck, Ludwik</author>
  269:             <year>1980</year>
  270:             <title>Entstehung und Entwicklung einer 
  271:                    wissenschaftlichen Tatsache</title>
  272:             <series_editor></series_editor>
  273:             <series_title></series_title>
  274:             <series_volume></series_volume>
  275:             <number_of_pages></number_of_pages>
  276:             <city>Frankfurt am Main</city>
  277:             <publisher>Suhrkamp</publisher>
  278:             <edition></edition>
  279:             <number_of_volumes></number_of_volumes>
  280:             <translator></translator>
  281:             <isbn></isbn>
  282:             <keywords>Wissenschaftstheorie, Fleck, Tatsache</keywords>
  283:             <abstract></abstract>
  284:         </bib>
  285:     </meta>
  286:     <dir>
  287:          <description>Scanned images (300dpi)</description>
  288:          <name>img</name>
  289:          <path></path>
  290:          <meta></meta>
  291:     </dir>
  292: </resource>
  293: \end{verbatim}
  294: 
  295: \end{document}
  296: 
  297: %%% Local Variables: 
  298: %%% mode: latex
  299: %%% TeX-master: t
  300: %%% End: 
  301: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>