--- storage/meta/meta-format.tex 2003/07/23 20:34:53 1.5 +++ storage/meta/meta-format.tex 2003/09/11 14:52:43 1.11 @@ -7,16 +7,16 @@ %\usepackage{courier} % create in-text links black (with PDF) -%\usepackage[colorlinks=true,linkcolor=black]{hyperref} +\usepackage[colorlinks=true,linkcolor=black]{hyperref} % Format URLs nicely (without PDF) -\usepackage{url} +%\usepackage{url} \title{A simple metadata format for resource bundles} \author{Robert Casties, Dirk Wintergrün, Hans-Christoph Liess} -\date{V0.3 of \today} +\date{V1.0.3 of 11.9.2003} \begin{document} @@ -72,9 +72,18 @@ supplied by the provider of the resource automatic scripts later in the process, these elements must be present in the final file. -The outer container element is \texttt{resource}. Sub-types (``ECHO'', -``MPIWG'') can be specified if necessary with a \texttt{type} -parameter. Its sub-elements are: +The outer container element is \texttt{resource}. It has the following +\textbf{attributes}: + +\begin{description} +\item[type] sub-type of resource (e.g. ``ECHO'', + ``MPIWG'') -- optional. + +\item[version] version number of metadata format (currently 1.0) -- + required. +\end{description} + +\noindent The allowed \textbf{elements} inside \texttt{resource} are: \begin{description} \item[description] An informal textual description of the @@ -165,14 +174,15 @@ parameter. Its sub-elements are: resource's root directory (excluding the file itself) -- required (may be empty or omitted if the file is in the resource's root directory). + + \item[date] The file's modification or creation date\footnote{The + preferred time and date format is ``YYYY/MM/DD HH:MM:SS''}, + whichever is more recent -- optional. \item[modification-date] The file's modification date -- optional. \item[creation-date] The file's creation date -- optional. - - \item[date] The file's creation date if is has not been modified -- - optional. - + \item[size] The file size -- deduced. \item[mime-type] The file's mime-type -- optional. @@ -522,7 +532,7 @@ appear multiple times. \end{description} -\subsection{Information on the document structure (table of contents)} +\subsection{Document structure (table of contents)} \label{sec:toc} Information on the structure of a document like the division into @@ -605,7 +615,7 @@ tags. %%\url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TS_lise} -\subsection{Information on scanned images} +\subsection{Scanned images} \label{sec:inform-scann-imag} Image files representing scanned images can have an \texttt{img} @@ -658,6 +668,70 @@ or \end{description} + +\subsection{Image acquisition} +\label{sec:inform-about-image} + +A description of the technology used in the process of producing a +digital image. + +\begin{description} +\item[image-acquisition] description of the image production process + \begin{description} + \item[device] acquisition device (e.g. ``flatbed scanner'') + + \item[image-type] type and color-depth of the image (e.g. ``RGB 24 + bit'') + + \item[postproduction] additional operations on the image + (e.g. ``sharpening, color correction'') + + \item[production-comment] additional textual information about the + production process + \end{description} +\end{description} + + +\subsection{Full text with images} +\label{sec:full-text-with} + +Full text in a XML format will be specified with a +\texttt{content-type} ``fulltext''. + +The relation between the full text and optional images of +whole pages or parts of pages must be specified in a +\texttt{text-tool} container. + +\begin{description} +\item[text-tool] representation of full text with images + + \begin{description} + \item[text-file] the file name of the full text file (with path + inside document directory) + + \item[page-images] the directory name of the directory containig the + page image files (with path + inside document directory) + + \item[xslt-file] the file name of an additional XSL transformation + file + + \item[text-config] container for configuration options + \begin{description} + \item[container-tag] the name of the text root element (default + ``text'') + + \item[ref-element-tag] the name of the element that is used as + unit of reference when results are presented + + \item[pagebreak-tag] the name of the element that indicates page + breaks (default ``pb'') + \end{description} + \end{description} +\end{description} + + + \subsection{Access restrictions} \label{sec:access-restrictions} @@ -674,7 +748,7 @@ scanned document. \begin{small} \begin{verbatim} - + Fleck, 1980 fleck.1980 University of Bern @@ -715,7 +789,7 @@ architectural drawing. \begin{small} \begin{verbatim} - + Bibliotheca Hertziana scanned images