--- storage/meta/meta-format.tex 2003/07/01 17:51:40 1.3
+++ storage/meta/meta-format.tex 2003/07/23 10:35:06 1.4
@@ -7,16 +7,16 @@
%\usepackage{courier}
% create in-text links black (with PDF)
-\usepackage[colorlinks=true,linkcolor=black]{hyperref}
+%\usepackage[colorlinks=true,linkcolor=black]{hyperref}
% Format URLs nicely (without PDF)
-%\usepackage{url}
+\usepackage{url}
\title{A simple metadata format for resource bundles}
-\author{Robert Casties, Dirk Wintergrün, Christoph Liess}
+\author{Robert Casties, Dirk Wintergrün, Hans-Christoph Liess}
-\date{V0.2.2 of \today}
+\date{V0.3pre2 of \today}
\begin{document}
@@ -35,6 +35,32 @@ in filenames are only the alphanumeric s
File and directory paths in the metadata file use the conventional
Unix file separator slash ``/''.
+
+\section{Metadata files}
+\label{sec:metadata-files}
+
+The metadata information is stored in the XML format documented below
+in special files in the resource directory. Two forms of metadata
+files are possible:
+\begin{itemize}
+\item a file named \texttt{index.meta} in a directory.
+
+\item a file named like the data file it describes with an
+ additional extension \texttt{.meta}. For example metadata for the
+ file \texttt{0001.tif} would be in a file \texttt{0001.tif.meta}.
+\end{itemize}
+
+The resource directory must contain an \texttt{index.meta} file with
+information about the resource as a whole. Other directories can
+contain \texttt{index.meta} files.
+
+Additional information about single data files that are part of the
+resource can either be put in \texttt{file} tags in the
+\texttt{index.meta} file or in separate \emph{filename}\texttt{.meta}
+files for each data file. Information from the directory level file is
+inherited at the file level.
+
+
\section{Resource format}
\label{sec:mpiwg-doc}
@@ -43,10 +69,10 @@ by the provider of the resource and may
the metadata file. Elements marked ``required'' must be supplied by
the provider of the resource. Elements marked ``deduced'' can be
supplied by the provider of the resource but can also be provided by
-automatic scripts later in the process, the elements must be present
+automatic scripts later in the process, these elements must be present
in the final file.
-The outer container is named \texttt{resource}. Sub-types (``ECHO'',
+The outer container element is \texttt{resource}. Sub-types (``ECHO'',
``MPIWG'') can be specified if necessary with a \texttt{type}
parameter. Its sub-elements are:
@@ -59,9 +85,12 @@ parameter. Its sub-elements are:
\item[creator] The name of the project or person that created the
resource -- optional.
+
+\item[archive-creation-date] The time and date the archive collection
+ was created -- deduced.
-\item[archive-creation-date] The time and date the archive was created
- -- deduced.
+\item[archive-storage-date] The time and date the archive was written
+ to permanent storage -- deduced (must not be set by the user).
\item[archive-path] The full path to the resource directory inside the
whole archive collection -- deduced.
@@ -164,17 +193,45 @@ parameter. Its sub-elements are:
All elements with \texttt{meta} tags can contain an arbitrary number
of additional metadata elements.
+\subsection{Language}
+\label{sec:lang}
+
+The language of a resource (e.g. a text) can be specified with a
+\texttt{lang} tag. Languages have to be described using the
+international codes for the representation of names of languages
+either in two-letter form (ISO 639-1) or in three-letter form (ISO
+639-2). The entire catalogue of languages is documented on the page
+
+\url{http://www.loc.gov/standards/iso639-2/englangn.html}
+
\subsection{DRI}
\label{sec:dri}
The \emph{digital resource identifier} for the resource is specified
-with a \texttt{dri} tag. Digital resource identifiers are documented
+in a \texttt{dri} element. Digital resource identifiers are documented
on the page
\url{http://pythia.mpiwg-berlin.mpg.de/projects/standards/dri}.
+
+\subsection{Collection context}
+\label{sec:collection-context}
+
+The context of a resource as part of a collection or part of a project can be
+specified in the \texttt{context} element:
+
+\begin{description}
+\item[link] URL to additional context information.
+
+\item[name] Textual description of project or collection.
+\end{description}
+\noindent multiple \texttt{link} or \texttt{name} elements are
+possible.
+
+
+
\subsection{Bibliographic information}
\label{sec:bibliographic-data}
@@ -182,22 +239,327 @@ Bibliographic information in the format
bibliographic data (cf. content workflow) or the MPIWG
``Projektbibliografie'' is presented in a \texttt{bib} container with
a \texttt{type} parameter, giving the type of bibliographic resource.
-The \texttt{type} field is repeated as a tag in the container. The
-tags have the variable ``human-readable'' field names.
+The \texttt{type} field can be repeated as a tag in the container.
+
+
+\subsubsection{Book}
+
+\begin{description}
+
+\item [bib type="book"] a published book.
+
+ \begin{description}
+ \item [author] The author of the book.
+ \item [year] The year of publication.
+ \item [title] Title of the book.
+ \item [series-editor] Name of the series editor, if the book appears
+ in a series.
+ \item [series-title] Title of the serie, if the book appears in a
+ series.
+ \item [series-volume] Volume number, if the book appears in a
+ series.
+ \item [number-of-pages] Number of pages of the entire book.
+ \item [city] City where the book was published.
+ \item [publisher] Name of the publishing company
+ \item [edition] Edition of the book (e.g. third edition)
+ \item [number-of-volumes] Number of volumes, if the the book is
+ published in multiple volumes.
+ \item [translator] Name of the translator.
+ \item [isbn-issn]
+ \end{description}
+\end{description}
+
+\subsubsection{In Book}
+
+\begin{description}
+\item [bib type="inbook"] an article as part of a book.
+
+ \begin{description}
+ \item [author] The author of the book.
+ \item [year] The year of publication.
+ \item [title] Title of the article.
+ \item [editor] Name of the book's editor.
+ \item [book-title] Title of the book.
+ \item [series-volume] Volume number, if the book appears in a
+ series.
+ \item [pages] Number of pages of the article.
+ \item [city] City where the book was published.
+ \item [publisher] Name of the publishing company
+ \item [edition] Edition of the book (e. g. third edition)
+ \item [series-author] Name of the series editor, if the book appears
+ in a series.
+ \item [series-title] Title of the series, if the book appears in a
+ series.
+ \item [number-of-volumes] Number of volumes, if the the book is
+ published in multiple volumes.
+ \item [translator] Name of the translator
+ \item [isbn-issn]
+ \end{description}
+\end{description}
+
+\subsubsection{Proceedings}
+
+\begin{description}
+\item [bib type="proceedings"] a conference proceedings publication.
+
+ \begin{description}
+ \item [author] The author of the article.
+ \item [year] The year of publication.
+ \item [title] Title of the article.
+ \item [editor] Name of the book's editor.
+ \item [conference-name] Name of the conference the proceedings are
+ related to.
+ \item [volume] Volume number.
+ \item [pages] Number of pages of the article.
+ \item [date] Date of the conference the proceedings are related to.
+ \item [conference]-location City where the conference was held.
+ \item [publisher] Name of the publishing company
+ \item [edition] Edition of the book (e. g. third edition)
+ \item [series-editor] Name of the series editor, if the book appears
+ in a series.
+ \item [series-title] Title of the series, if the book appears in a
+ series.
+ \item [number-of-volumes] Number of volumes, if the the book is
+ published as multiple volumes.
+ \item [isbn-issn]
+ \end{description}
+\end{description}
+
+\subsubsection{Edited Book}
+
+\begin{description}
+\item[bib type="edited-book"] a book that is the edition of another
+ work.
+
+ \begin{description}
+ \item [editor] Name of the editor of the book.
+ \item [year] The year of publication.
+ \item [title] Title of the book.
+ \item [series-editor] Name of the editor of the series the book is
+ part of.
+ \item [series-title] Title of the series, if the book is part of a
+ series.
+ \item [series-volume] Volume number, if the book appears in a series.
+ \item [number-of-pages] Number of pages of the article.
+ \item [city] City where the book was published.
+ \item [publisher] Name of the publishing company
+ \item [edition] Information about the edition (e.g. ``Repr. of the London ed. 1652'')
+ \item [number-of-volumes] Number of volumes, if the the book is
+ published as multiple volumes.
+ \item [isbn-issn]
+ \end{description}
+\end{description}
+
+\subsubsection{Journal Article}
+
+\begin{description}
+\item [bib type="journal-article"] an article in a scientific journal.
+ \begin{description}
+ \item [author] The author of the article.
+ \item [year] The year of publication.
+ \item [title] Title of the article.
+ \item [journal] Name of the journal.
+ \item [volume] Volume number, if the journal appears in a series.
+ \item [issue] Number of the issue the article is part of.
+ \item [pages] Number of pages of the article.
+ \item [alternate-journal] Alternate Journal
+ \item [isbn-issn]
+ \end{description}
+\end{description}
+
+\subsubsection{Magazine Article}
+
+\begin{description}
+\item [bib type="magazine-article"] an article in a popular magazine.
+ \begin{description}
+ \item [author] The author of the book.
+ \item [year] The year of publication.
+ \item [title] Title of the article.
+ \item [magazine] Name of the magazine.
+ \item [volume] Volume number, if the book appears in a series.
+ \item [issue-number] Number of the issue the article is part of.
+ \item [pages Number] of pages of the article.
+ \item [date] Date when the article appeared.
+ \end{description}
+\end{description}
+
+\subsubsection{Newspaper Article}
+
+\begin{description}
+\item [bib type="newspaper-article"] an article in a newspaper.
+ \begin{description}
+ \item [author] The author of the article.
+ \item [year] The year of publication.
+ \item [title] Title of the article.
+ \item [Newspaper] Name of the newspaper the article appeared in.
+ \item [pages] Number of pages of the article.
+ \item [issue-date] Date of the issue the article is part of.
+ \item [city] City of the newspaper.
+ \end{description}
+\end{description}
+
+\subsubsection{Thesis}
+
+\begin{description}
+\item [bib type="thesis"] a master/doctorate/etc. thesis.
+ \begin{description}
+ \item [author] The author of the thesis.
+ \item [year] The year of publication.
+ \item [title] Title of the thesis.
+ \item [academic-department] Name of the academic department where
+ the thesis was handed in.
+ \item [number-of-pages] Number of pages of the thesis.
+ \item [city] City where the thesis was published.
+ \item [University] Name of the university where the thesis was
+ handed in.
+ \item [isbn-issn]
+ \end{description}
+\end{description}
+
+\subsubsection{Report}
+
+\begin{description}
+\item [bib type="report"] a scientific report.
+ \begin{description}
+ \item [author] The author of the report.
+ \item [year] The year of publication.
+ \item [title] Title of the report.
+ \item [pages] Number of pages of the report.
+ \item [date] Date when the report appeared.
+ \item [city] City where the book was published.
+ \item [institution] Institution where the report was produced.
+ \item [type] Type of report.
+ \item [report-number] Report number.
+ \end{description}
+\end{description}
+
+\subsubsection{Generic}
+
+\begin{description}
+\item [bib type="generic"] a generic bibliographic type. This type
+ should only be used in rare cases.
+ \begin{description}
+ \item [author]
+ \item [year]
+ \item [title]
+ \item [secondary-author]
+ \item [secondary-title]
+ \item [volume]
+ \item [number]
+ \item [pages]
+ \item [date]
+ \item [place-published]
+ \item [publisher]
+ \item [edition]
+ \item [tertiary author]
+ \item [tertiary-title]
+ \item [number-of-volumes]
+ \item [type-of-work]
+ \item [subsidiary author]
+ \item [alternate-title]
+ \item [isbn-issn]
+ \item [call-number]
+ \item [label]
+ \item [keywords]
+ \item [abstract]
+ \item [notes]
+ \item [url]
+\end{description}
+\end{description}
+
+
+\subsection{Architectural drawings}
+\label{sec:doc}
+
+Specific information for architectural drawings is presented in a
+\texttt{doc} container. All elements can appear multiple times.
+
+\begin{description}
+\item [person] last name and first name of a person, separated by a
+ comma. A further common name for the person can be put infront,
+ separated by a semicolon.
+\item [location] Name of a place in its common notation. This can
+ be a city or a institution.
+\item [date] This can be a year (or several years, separated by commas) or a period
+ (1706-1714). Years are noted with four digits.
+\item [object] Short description of an object or signatures.
+\item [keywords] Keywords related to the object.
+\end{description}
\subsection{Information on the document structure (table of contents)}
\label{sec:toc}
-Document structure information like a table of contents for a scanned
-document is presented in a \texttt{toc} container. The format to be
-used has to be further specified. The format could be based on the so
-called ``LiSe-XML'' format. For a detailed description and an
-exemplary set of TOC information see:
+Information on the structure of a document like the division into
+parts and chapters in the way of a table of contents is presented in a
+\texttt{toc} container.
+
+The scheme allows multiple logical pages on a single page image
+as it is often the case with scanned books or manuscripts. The scheme
+also allows for ``loose'' numbering schemes with roman, arabic or
+other page numbers consecutively or mixed and changes in the numbering
+within the document.
+
+The flexibility comes from the fact that no additional assumptions
+about the mapping between logical pages and page images are made in
+the format. All mapping information is specified by the user.
+
+The logical page numbering or naming that can be presented to the user
+is specified in the \texttt{name} tags while the physical numbering of
+the page images is specified in the \texttt{index} or \texttt{url}
+tags.
+
+\begin{description}
+\item[page] describes a single logical page
+ \begin{description}
+ \item[name] the ``name'' of the logical page. This can be any string
+ like a page number (arabic, roman, etc.) or a special designation
+ like ``Table 5''.
+
+ \item[index] the \texttt{digilib} index number\footnote{The index
+ number for digilib is the index in the alphabetical order of the
+ scan file names.} of the scan image of the page.
+
+ \item[url] alternatively to the \texttt{digilib} index number the
+ full URL of the scan image of the page can be used.
+ \end{description}
+
+\item[chapter] describes a section or chapter of the text.
+ \texttt{chapter} elements can be nested.
+ \begin{description}
+ \item[name] the title of the chapter or section.
+
+ \item[start] the beginning of a page range (usually the first page
+ of the chapter). The \texttt{start} element has an optional
+ \texttt{increment} attribute to indicate the number of logical
+ pages on a scan image.\footnote{This information is only needed by
+ additional tools that try to generate lists of all page and
+ image numbers.}
+ \begin{description}
+ \item[name] the ``name'' of the first page (see \texttt{page}).
+
+ \item[index] the index of the first page (see \texttt{page}).
-\url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TS_lise}
+ \item[url] the URL of the first page (see \texttt{page}).
+ \end{description}
+
+ \item[end] the end of a page range (usually the last page of the
+ chapter).
+ \begin{description}
+ \item[name] the ``name'' of the last page (see \texttt{page}).
-\url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TSlise/lise_downloads/deimel1929.xml}
+ \item[index] the index of the last page (see \texttt{page}).
+
+ \item[url] the URL of the last page (see \texttt{page}).
+ \end{description}
+
+ \item[page] alternative (and additional) to
+ \texttt{start}/\texttt{end} page ranges single \texttt{page}
+ elements can be used inside \texttt{chapter}.
+ \end{description}
+\end{description}
+
+%%\url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TS_lise}
\subsection{Information on scanned images}
@@ -250,33 +612,32 @@ reasons then the restrictions can be put
inside the container has to be further specified.
-\section{Sample metadata file for an ECHO resource}
-
-The following is the sample structure for a scanned document resource.
+\section{Sample metadata files for ECHO resources}
+The following is a sample structures for a scanned document.
\begin{verbatim}
-
+ Fleck, 1980fleck.1980University of Bern
- ubern/wiss-theoriescanned imagesecho23a45e2329x
+ gerFleck, Ludwik1980Entstehung und Entwicklung einer
wissenschaftlichen Tatsache
-
-
-
-
+
+
+
+ Frankfurt am MainSuhrkamp
-
+ Wissenschaftstheorie, Fleck, Tatsache
@@ -286,12 +647,47 @@ The following is the sample structure fo
Scanned images (300dpi)img
-
-
\end{verbatim}
+The following is a sample metadata structure for an architectural
+drawing.
+
+\begin{verbatim}
+
+ Bibliotheca Hertziana
+ scanned images
+
+ 00000271-asl-160-r-full.tif
+
+
+ 315
+
+ echo45a67bc4367d
+ ita
+
+ Ciolli, Giacomo
+ Urban VIII; Barberini, Maffeo
+ Accademia di San Luca
+ Roma
+ 1706
+
+
+
+
+
+
+
+ http://colosseum.biblhertz.it:8080/Lineamenta/
+ 1033478408.39/1035196181.35/1035196204.09/1035394121.83
+
+
+
+
+
+\end{verbatim}
+
\end{document}
%%% Local Variables: