--- storage/meta/meta-format.tex 2003/06/25 10:53:35 1.2
+++ storage/meta/meta-format.tex 2011/08/01 10:36:19 1.30
@@ -3,10 +3,10 @@
\usepackage[latin1]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{ae}
-%\usepackage{times}
-%\usepackage{courier}
+\usepackage{times}
+\usepackage{courier}
-% create in-text links black (with PDF)
+% create in-text links in black (with PDF)
\usepackage[colorlinks=true,linkcolor=black]{hyperref}
% Format URLs nicely (without PDF)
%\usepackage{url}
@@ -14,9 +14,9 @@
\title{A simple metadata format for resource bundles}
-\author{Robert Casties, Dirk Wintergrün, Christoph Liess}
+\author{Robert Casties, Dirk Wintergrün, Hans-Christoph Liess}
-\date{V0.2.1 of \today}
+\date{V1.4.0 of 1.8.2011}
\begin{document}
@@ -32,8 +32,48 @@ File and directory names should not cont
in filenames are only the alphanumeric set a-z, A-Z, 0-9, hyphen
``-'', underscore ``\_'' and dot ``.''.
-File and directory paths in the metadata file use the conventional
-Unix file separator slash ``/''.
+Files and directories with names that contain illegal characters must
+be transformed to allowed names. A proposition for a simple
+transformation rule is
+
+\begin{itemize}
+\item whitespace characters (e.g. blank, tab, cr, lf) are replaced by
+ hyphens ``-''
+
+\item other illegal characters are replaced by underscores ``\_''.
+\end{itemize}
+
+This rule does not provide a reversible mapping to the original
+illegal file name and it does not provide a collision-free mapping,
+i.e. two different illegal file names might be mapped to the same
+allowed file name. Additional precautions for these cases must be
+taken.
+
+
+\section{Metadata files}
+\label{sec:metadata-files}
+
+The metadata information is stored in the XML format documented below
+in special files in the resource directory. Two forms of metadata
+files are possible:
+\begin{itemize}
+\item a file named \texttt{index.meta} in a directory.
+
+\item a file with the same name as the data file it describes and an
+ additional extension \texttt{.meta}. For example metadata for the
+ file \texttt{p0001.tif} would be in a file \texttt{p0001.tif.meta}.
+\end{itemize}
+
+The resource directory must contain an \texttt{index.meta} file with
+information about the resource as a whole. Subdirectories can
+contain additional \texttt{index.meta} files.
+
+Additional information about single data files that are part of the
+resource can either be put in \texttt{file} tags in the
+\texttt{index.meta} file or in separate \emph{filename}\texttt{.meta}
+files for each data file. Information from the directory level file is
+inherited at the file level when it is not overwritten.
+
\section{Resource format}
\label{sec:mpiwg-doc}
@@ -43,36 +83,75 @@ by the provider of the resource and may
the metadata file. Elements marked ``required'' must be supplied by
the provider of the resource. Elements marked ``deduced'' can be
supplied by the provider of the resource but can also be provided by
-automatic scripts later in the process, the elements must be present
+automatic scripts later in the process, these elements must be present
in the final file.
-The outer container is named \texttt{resource}. Sub-types (``ECHO'',
-``MPIWG'') can be specified if necessary with a \texttt{type}
-parameter. Its sub-elements are:
+File and directory paths in the metadata file use the conventional
+Unix file separator slash ``/''.
+
+The outer container element is \texttt{resource}. It has the following
+\textbf{attributes}:
\begin{description}
-\item[description] An informal textual description of the
- resource -- optional.
+\item[type] sub-type of resource (e.g. ``ECHO'', ``MPIWG'') --
+ optional.
+
+\item[version] version number of metadata format (currently 1.2) --
+ required.
+\end{description}
+
+\noindent The allowed \textbf{elements} inside \texttt{resource} are:
+
+\begin{description}
+\item[description] An informal textual description of the resource --
+ optional\footnote{At least one description of the resource's content
+ is required. The description can be an informal
+ \texttt{description} element or a descriptive element (like
+ \texttt{bib}) in a \texttt{meta} container.}.
\item[name] The filename of the resource (name of the directory this
file is contained in) -- required.
\item[creator] The name of the project or person that created the
resource -- optional.
+
+\item[archive-creation-date] The time and date the archive collection
+ was created -- deduced.
-\item[archive-creation-date] The time and date the archive was created
- -- deduced.
+\item[archive-storage-date] The time and date the archive was written
+ to permanent storage -- deduced (must not be set by the user).
\item[archive-path] The full path to the resource directory inside the
- whole archive collection -- deduced.
+ whole archive collection, including the resource directory -- deduced.
+
+\item[archive-id] The ID for this document in the archive --
+ optional.
\item[derived-from] Container for the description of the original
resource if this resource is a modified version of another resource
-- optional.
\begin{description}
+ \item[archive-id] The ID of the original resource
+ -- required (or archive-path).
+
\item[archive-path] The full path to the original resource
- --required.
+ -- required (or archive-id).
+
+ \item[description] An informal textual description of the relation
+ of this resource to the original resource -- optional.
+ \end{description}
+
+\item[used-by] Container for the description of modified resources
+ if this resource is the source of another resource
+ -- optional.
+
+ \begin{description}
+ \item[archive-id] The ID of the derived resource
+ -- required (or archive-path).
+
+ \item[archive-path] The full path to the derived resource
+ -- required (or archive-id).
\item[description] An informal textual description of the relation
of this resource to the original resource -- optional.
@@ -83,21 +162,45 @@ parameter. Its sub-elements are:
-- optional.
\begin{description}
+ \item[archive-id] The ID of the linked resource
+ -- required (or archive-path).
+
\item[archive-path] The full path to the linked resource
- --required.
+ -- required (or archive-id).
\item[description] An informal textual description of the relation
of this resource to the linked resource -- optional.
\end{description}
-\item[content-type] The content type of this resource -- required.\\
- The content type enables the choice of tools to manipulate and
- display the resource. There should be a common list of content
- types. For digital documents (books, manuscripts) this would be
- "scanned document", for other image data "scanned
- images".\footnote{The criterion for documents is a ordered
- succession of image files (pages) and equal image size and
- resolution throughout the images of a resource.}
+\item[is-part-of] Container for the description of another resource if this
+ resource is a part of the other resource. -- optional. It can have a
+ \texttt{type} attribute describing the type of relation .e.g. ``manuscript-codex''.
+
+ \begin{description}
+ \item[archive-id] The ID of the original resource
+ -- required (or archive-path).
+
+ \item[archive-path] The full path to the original resource
+ -- required (or archive-id).
+
+ \item[description] An informal textual description of the relation
+ of this resource to the original resource -- optional.
+ \end{description}
+
+\item[media-type] \label{tag-media-type} The main media type of this
+ resource -- required.\\ The main media type can be overridden by
+ \texttt{media-type}s in subdirectories. Possible types are
+ \begin{itemize}
+ \item \texttt{image}
+
+ \item \texttt{text}
+
+ \item \texttt{audio}
+
+ \item \texttt{video}
+
+ \item \texttt{data} for other type of data
+ \end{itemize}
\item[meta] Additional metadata information about the resource --
optional.\\ For a description of additional metadata see below.
@@ -113,10 +216,16 @@ parameter. Its sub-elements are:
\item[name] The name of the subdirectory -- required.
+ \item[original-name] A text string associated with the directory as
+ original name -- optional. (E.g. if the data in this directory
+ came from an external source and had a name that had to be changed
+ according to section~\ref{sec:file-directory-names} but it should
+ be possible to reference the original name.)
+
\item[path] The directory path of this subdirectory relative to the
- resource's root directory -- required (may be identical to
- \texttt{name} or omitted if the directory is a direct child of the
- resource's root directory).
+ resource's root directory (excluding the directory itself) --
+ required (may be empty or omitted if the directory is a direct
+ child of the resource's root directory).
\item[meta] Additional metadata information about the directory --
optional.\\ For a description of additional metadata see below.
@@ -132,18 +241,25 @@ parameter. Its sub-elements are:
\item[name] The name of the file -- required.
+ \item[original-name] A text string associated with the file as
+ original name -- optional. (e.g. if this file came from an
+ external source and had a name that had to be changed according to
+ section~\ref{sec:file-directory-names} it is possible
+ to preserve the original name.)
+
\item[path] The directory path of this file relative to the
- resource's root directory -- required (may be identical to
- \texttt{name} or omitted if the file is in resource's root
+ resource's root directory (excluding the file itself) -- required
+ (may be empty or omitted if the file is in the resource's root
directory).
+
+ \item[date] The file's modification or creation date\footnote{The
+ preferred time and date format is ``YYYY/MM/DD HH:MM:SS''},
+ whichever is more recent -- optional.
\item[modification-date] The file's modification date -- optional.
\item[creation-date] The file's creation date -- optional.
-
- \item[date] The file's creation date if is has not been modified --
- optional.
-
+
\item[size] The file size -- deduced.
\item[mime-type] The file's mime-type -- optional.
@@ -162,45 +278,611 @@ parameter. Its sub-elements are:
\label{sec:additional-metadata}
All elements with \texttt{meta} tags can contain an arbitrary number
-of additional metadata elements.
+of the following additional metadata elements.
+
+\subsection{Workflow state}
+\label{sec:workflow-state}
+
+All additional metadata elements can have a \texttt{workflow-state}
+\textbf{attribute}. This attribute reflects the state of the
+corresponding metadata element. The possible values for the
+\texttt{workflow-state} attribute are
+\begin{itemize}
+\item \texttt{preliminary} this information is preliminary. It must
+ be checked in further workflow steps.
+
+\item \texttt{inwork}
+
+\item \texttt{final}
+\end{itemize}
+
+workflow states other than \texttt{preliminary} are part of the
+workflow handling of the respective projects.
+
+Metadata elements can appear multiple times with different
+\texttt{workflow-state} attributes. This enables metadata versioning.
+
+
+
+\subsection{Content type}
+\label{sec:content-type}
+
+\begin{description}
+\item[content-type] \label{tag-content-type} The content type of this
+ resource -- required.\\
+ The content type enables the choice of tools to manipulate and
+ display the resource. There should be a common list of content
+ types. For digital documents (books, manuscripts) this would be
+ ``scanned document'', for other image data ``scanned
+ images''.\footnote{The criterion for documents is a ordered
+ succession of image files (pages) and equal image size and
+ resolution throughout the images of a resource.}
+\end{description}
+
+
+
+\subsection{Language}
+\label{sec:lang}
+
+The language of a resource (e.g. a text) can be specified with a
+\texttt{lang} tag. Languages have to be described using the
+international codes for the representation of names of languages
+either in two-letter form (ISO 639-1) or in three-letter form (ISO
+639-2). The entire catalogue of languages is documented on the page
+
+\url{http://www.loc.gov/standards/iso639-2/englangn.html}
\subsection{DRI}
\label{sec:dri}
The \emph{digital resource identifier} for the resource is specified
-with a \texttt{dri} tag. Digital resource identifiers are documented
+in a \texttt{dri} element. Digital resource identifiers are documented
on the page
\url{http://pythia.mpiwg-berlin.mpg.de/projects/standards/dri}.
+
+\subsection{Collection context}
+\label{sec:collection-context}
+
+The context of a resource as part of a collection or part of a project
+can be specified in the \texttt{context} element. The context element
+can appear multiple times if the resource is part of multiple
+collections or projects.
+
+\begin{description}
+\item[context] information on collection or project context.
+
+ \begin{description}
+ \item[link] URL to additional context information -- optional.
+
+ \item[name] Textual description of project or collection -- optional.
+
+ \item[meta-datalink] description of external sources of canonical meta
+ information -- optional
+ \begin{description}
+ \item[db] \textbf{attribute} to identify different sets of meta data
+ links to the same resource -- optional
+
+ \item[object] \textbf{attribute} to identify different objects or
+ parts of the same resource -- optional
+
+ \item[label] textual label for the link -- optional
+
+ \item[url] URL to present to the client -- optional
+
+ \item[metadata-url] URL to an external server to be queried -- optional
+ \end{description}
+
+ \item[meta-baselink] description of external server for canonical meta
+ information -- optional
+ \begin{description}
+ \item[db] \textbf{attribute} to identify different sets of meta data
+ links to the same resource -- optional
+
+ \item[label] textual label for the link -- optional
+
+ \item[url] URL to present to the client -- optional
+
+ \item[metadata-url] URL to an external server to be queried --
+ required (the parameter \texttt{object=} with an object id has
+ to be appended to this URL)
+ \end{description}
+ \end{description}
+\end{description}
+
+
+
+
\subsection{Bibliographic information}
\label{sec:bibliographic-data}
-Bibliographic information in the format of the ECHO scheme for
-bibliographic data (cf. content workflow) or the MPIWG
-``Projektbibliografie'' is presented in a \texttt{bib} container with
+Bibliographic information is presented in a \texttt{bib} container with
a \texttt{type} parameter, giving the type of bibliographic resource.
-The \texttt{type} field is repeated as a tag in the container. The
-tags have the variable ``human-readable'' field names.
+The \texttt{type} field can be repeated as a tag in the container.
+
+The format is based on the ECHO scheme for bibliographic data (cf.
+content workflow), the MPIWG ``Projektbibliografie'' and the format of
+the commonly used program ``EndNote''.
+
+
+\subsubsection{Book}
+
+\begin{description}
+
+\item [bib type="book"] a published book.
+
+ \begin{description}
+ \item [author] The author of the book.
+ \item [year] The year of publication.
+ \item [title] Title of the book.
+ \item [series-editor] Name of the series editor, if the book appears
+ in a series.
+ \item [series-title] Title of the serie, if the book appears in a
+ series.
+ \item [series-volume] Volume number, if the book appears in a
+ series.
+ \item [number-of-pages] Number of pages of the entire book.
+ \item [city] City where the book was published.
+ \item [publisher] Name of the publishing company
+ \item [edition] Edition of the book (e.g. third edition)
+ \item [number-of-volumes] Number of volumes, if the the book is
+ published in multiple volumes.
+ \item [translator] Name of the translator.
+ \item [isbn-issn]
+ \item[call-number] Call number in holding library
+ \item[holding-library] Holding library
+ \end{description}
+\end{description}
+
+\subsubsection{In Book}
+
+\begin{description}
+\item [bib type="inbook"] an article as part of a book.
+
+ \begin{description}
+ \item [author] The author of the book.
+ \item [year] The year of publication.
+ \item [title] Title of the article.
+ \item [editor] Name of the book's editor.
+ \item [book-title] Title of the book.
+ \item [series-volume] Volume number, if the book appears in a
+ series.
+ \item [pages] Number of pages of the article.
+ \item [city] City where the book was published.
+ \item [publisher] Name of the publishing company
+ \item [edition] Edition of the book (e. g. third edition)
+ \item [series-author] Name of the series editor, if the book appears
+ in a series.
+ \item [series-title] Title of the series, if the book appears in a
+ series.
+ \item [number-of-volumes] Number of volumes, if the the book is
+ published in multiple volumes.
+ \item [translator] Name of the translator
+ \item [isbn-issn]
+ \item[call-number] Call number in holding library
+ \item[holding-library] Holding library
+ \end{description}
+\end{description}
+
+\subsubsection{Proceedings}
+
+\begin{description}
+\item [bib type="proceedings"] a conference proceedings publication.
+
+ \begin{description}
+ \item [author] The author of the article.
+ \item [year] The year of publication.
+ \item [title] Title of the article.
+ \item [editor] Name of the book's editor.
+ \item [conference-name] Name of the conference the proceedings are
+ related to.
+ \item [volume] Volume number.
+ \item [pages] Number of pages of the article.
+ \item [date] Date of the conference the proceedings are related to.
+ \item [conference]-location City where the conference was held.
+ \item [publisher] Name of the publishing company
+ \item [edition] Edition of the book (e. g. third edition)
+ \item [series-editor] Name of the series editor, if the book appears
+ in a series.
+ \item [series-title] Title of the series, if the book appears in a
+ series.
+ \item [number-of-volumes] Number of volumes, if the the book is
+ published as multiple volumes.
+ \item [isbn-issn]
+ \item[call-number] Call number in holding library
+ \item[holding-library] Holding library
+ \end{description}
+\end{description}
+
+\subsubsection{Edited Book}
+
+\begin{description}
+\item[bib type="edited-book"] a book that is the edition of another
+ work.
+
+ \begin{description}
+ \item [editor] Name of the editor of the book.
+ \item [year] The year of publication.
+ \item [title] Title of the book.
+ \item [series-editor] Name of the editor of the series the book is
+ part of.
+ \item [series-title] Title of the series, if the book is part of a
+ series.
+ \item [series-volume] Volume number, if the book appears in a series.
+ \item [number-of-pages] Number of pages of the article.
+ \item [city] City where the book was published.
+ \item [publisher] Name of the publishing company
+ \item [edition] Information about the edition (e.g. ``Repr. of the London ed. 1652'')
+ \item [number-of-volumes] Number of volumes, if the the book is
+ published as multiple volumes.
+ \item [isbn-issn]
+ \item[call-number] Call number in holding library
+ \item[holding-library] Holding library
+ \end{description}
+\end{description}
+
+\subsubsection{Journal Volume}
+
+\begin{description}
+\item [bib type="journal-volume"] a volume of a scientific journal.
+ \begin{description}
+ \item [title] Name of the journal.
+ \item [editor] The editor of the journal.
+ \item [publisher] Name of the publishing company.
+ \item [city] City where the journal is published.
+ \item [year] The year of publication.
+ \item [volume] Volume number.
+ \item [numer-of-pages] Number of pages of the volume.
+ \item [isbn-issn]
+ \item[call-number] Call number in holding library
+ \item[holding-library] Holding library
+ \end{description}
+\end{description}
+
+\subsubsection{Journal Article}
+
+\begin{description}
+\item [bib type="journal-article"] an article in a scientific journal.
+ \begin{description}
+ \item [author] The author of the article.
+ \item [year] The year of publication.
+ \item [title] Title of the article.
+ \item [journal] Name of the journal.
+ \item [volume] Volume number, if the journal appears in a series.
+ \item [issue] Number of the issue the article is part of.
+ \item [pages] Number of pages of the article.
+ \item [alternate-journal] Alternate Journal
+ \item [isbn-issn]
+ \item[call-number] Call number in holding library
+ \item[holding-library] Holding library
+ \end{description}
+\end{description}
+
+\subsubsection{Magazine Article}
+
+\begin{description}
+\item [bib type="magazine-article"] an article in a popular magazine.
+ \begin{description}
+ \item [author] The author of the book.
+ \item [year] The year of publication.
+ \item [title] Title of the article.
+ \item [magazine] Name of the magazine.
+ \item [volume] Volume number, if the book appears in a series.
+ \item [issue-number] Number of the issue the article is part of.
+ \item [pages Number] of pages of the article.
+ \item [date] Date when the article appeared.
+ \item[call-number] Call number in holding library
+ \item[holding-library] Holding library
+ \end{description}
+\end{description}
+
+\subsubsection{Newspaper Article}
+
+\begin{description}
+\item [bib type="newspaper-article"] an article in a newspaper.
+ \begin{description}
+ \item [author] The author of the article.
+ \item [year] The year of publication.
+ \item [title] Title of the article.
+ \item [Newspaper] Name of the newspaper the article appeared in.
+ \item [pages] Number of pages of the article.
+ \item [issue-date] Date of the issue the article is part of.
+ \item [city] City of the newspaper.
+ \item[call-number] Call number in holding library
+ \item[holding-library] Holding library
+ \end{description}
+\end{description}
+
+\subsubsection{Thesis}
+
+\begin{description}
+\item [bib type="thesis"] a master/doctorate/etc. thesis.
+ \begin{description}
+ \item [author] The author of the thesis.
+ \item [year] The year of publication.
+ \item [title] Title of the thesis.
+ \item [academic-department] Name of the academic department where
+ the thesis was handed in.
+ \item [number-of-pages] Number of pages of the thesis.
+ \item [city] City where the thesis was published.
+ \item [University] Name of the university where the thesis was
+ handed in.
+ \item [isbn-issn]
+ \item[call-number] Call number in holding library
+ \item[holding-library] Holding library
+ \end{description}
+\end{description}
+
+\subsubsection{Report}
+
+\begin{description}
+\item [bib type="report"] a scientific report.
+ \begin{description}
+ \item [author] The author of the report.
+ \item [year] The year of publication.
+ \item [title] Title of the report.
+ \item [pages] Number of pages of the report.
+ \item [date] Date when the report appeared.
+ \item [city] City where the book was published.
+ \item [institution] Institution where the report was produced.
+ \item [type] Type of report.
+ \item [report-number] Report number.
+ \item[call-number] Call number in holding library
+ \item[holding-library] Holding library
+ \end{description}
+\end{description}
+
+\subsubsection{Manuscript}
+
+\begin{description}
+\item [bib type="manuscript"] a handwritten/typewritten manuscript.
+
+ \begin{description}
+ \item [title] Title of the manuscript.
+ \item [author] The author of the text.
+ \item [location] Name of the library where the manuscript is
+ currently located.
+ \item [year] The year or century of publication.
+ \item [pages] Number of pages of the manuscript.
+ \item [signature] Signature of the manuscript.
+ \item [editorial-remarks] Remarks related to the online
+ publication of the manuscript. This could be notes about
+ annotations etc.
+ \item [description] This can be any kind of description.
+ \item [keywords] Keywords related to the manuscript.
+ \item[call-number] Call number in holding library
+ \item[holding-library] Holding library
+ \end{description}
+\end{description}
+
+\subsubsection{Extended Manuscript}
+
+\begin{description}
+\item [bib type="extended-manuscript"] a handwritten/typewritten manuscript
+with detailed information about the manuscripts appearance.
+
+ \begin{description}
+ \item [title] Title of the manuscript.
+ \item [author] The author of the text.
+ \item[holding-library] Holding library.
+ \item[call-number] Call number/Shelf mark in holding library.
+ \item[location] Place/City/Country where the manuscript is
+ currently located.
+ \item[date calendar="type"] The date of publication with attribute which
+ calendar used. If no attribute used, CE is the default. Can also be
+ descriptive.
+ \item[year calendar="type"] Approximate year or century .
+ \item[number-of-folios] Number of folios/pages of the manuscript.
+ \item[signature] Signature(s) of the manuscript, under which a manuscript is
+ known.
+ \item[abstract] Interpretative abstract of the text's content.
+ \item[incipit] Incipit (beginning of text).
+ \item[explicit] Explicit (end of text).
+ \item[contents] Formal description of the text structure (e.g. table of
+ contents).
+ \item[writing-surface] material of the writing surface (e.g. ``non-european
+ paper'', ``palm leaf'',\ldots)
+ \item[foliation] Text giving list or range of folios.
+ \item[page-dimensions] height and width in cm.
+ \item[written-area-dimensions] height and width in cm.
+ \item[lines-per-page] number of lines and columns.
+ \item[catchwords] Quire signatures and catchwords.
+ \item[scripts] Description of the script and the ink used.
+ \item[copyist] Copyist.
+ \item[collation-corrections] Notes on collation and corrections.
+ \item[binding] Description of binding.
+ \item[notes-on-ownership] Notes on ownership.
+ \item[notes] Additional notes.
+ \item[secondary-literature] Notes on secondary literature related to the
+ manuscript
+ \item [editorial-remarks] Remarks related to the online
+ publication of the manuscript.
+ \item [keywords] Keywords related to the manuscript.
+ \end{description}
+\end{description}
+
+\subsubsection{Codex}
+
+\begin{description}
+\item [bib type="codex"] Codex i.e. bound collection of one or more manuscripts.
+
+ \begin{description}
+%TODO: do we need collection information?
+ \item[holding-library] Holding library.
+ \item[call-number] Call number/Shelf mark in holding library.
+ \item[location] Place/City/Country where the codex is
+ currently located.
+ \item[date calendar="type"] Date of the collation of the codex.
+ \item[year calendar="type"] Approximate year or century .
+ \item[number-of-folios] Number of folios/pages of the manuscript.
+ \item[foliation] Text giving list or range of folios.
+ \item[signature] Signature(s) of the manuscript, under which a manuscript is
+ known.
+ \item[contents] Formal description of the text structure (e.g. table of
+ contents).
+ \item[dimensions] height + width in cm.
+ \item[binding] Description of binding.
+ \item[notes] Additional notes.
+ \item[notes-on-ownership] Notes on ownership.
+ \end{description}
+\end{description}
+
+
+\subsubsection{Correspondence}
+
+\begin{description}
+\item [bib type="correspondence"] a piece of correspondence e.g. letter, telegram, in the following called ``letter''
+
+ \begin{description}
+ \item[type] The type of correspondence, e.g. ``letter'', ``postcard'', ``telegram'', ``letter draft''
+ \item [author] The author/sender of the letter.
+ \item [recipient] The recipient of the letter.
+ \item [date] normalised date of the letter.
+ \item [date-range-end] end of range of uncertain dating -- optional.
+ \item [date-original] the date in its original form as noted on the letter -- optional.
+ \item [place] place where the letter was written/sent.
+ \item [title] Title of the letter -- optional.
+ \item[incipit] The opening phrase of the letter -- optional.
+ \item[explicit] The closing phrase of the letter -- optional.
+ \item [pages] Number of pages of the manuscript.
+ \item [signature] Canonical signature/call number of the manuscript.
+ \item [description] This can be any kind of description.
+ \item [keywords] Keywords related to the manuscript.
+ \item[call-number] Call number in the current holding library
+ \item[holding-library] current holding library
+ \end{description}
+\end{description}
-\subsection{Information on the document structure (table of contents)}
+\subsubsection{Generic}
+
+\begin{description}
+\item [bib type="generic"] a generic bibliographic type. This type
+ should only be used in rare cases.
+ \begin{description}
+ \item [author]
+ \item [year]
+ \item [title]
+ \item [secondary-author]
+ \item [secondary-title]
+ \item [volume]
+ \item [number]
+ \item [pages]
+ \item [date]
+ \item [place-published]
+ \item [publisher]
+ \item [edition]
+ \item [tertiary author]
+ \item [tertiary-title]
+ \item [number-of-volumes]
+ \item [type-of-work]
+ \item [subsidiary author]
+ \item [alternate-title]
+ \item [isbn-issn]
+ \item [call-number]
+ \item [label]
+ \item [keywords]
+ \item [abstract]
+ \item [notes]
+ \item [url]
+ \end{description}
+\end{description}
+
+
+\subsection{Document structure (table of contents)}
\label{sec:toc}
-Document structure information like a table of contents for a scanned
-document is presented in a \texttt{toc} container. The format to be
-used has to be further specified. The format could be based on the so
-called ``LiSe-XML'' format. For a detailed description and an
-exemplary set of TOC information see:
+Information on the structure of a document like the division into
+parts and chapters in the way of a table of contents is presented in a
+\texttt{toc} container.
+
+This scheme allows multiple logical pages on a single page image
+as it is often the case with scanned books or manuscripts. The scheme
+also allows for ``loose'' numbering schemes with roman, arabic or
+other page numbers consecutively or mixed and changes in the numbering
+within the document.
+
+The flexibility comes from the fact that no additional assumptions
+about the mapping between logical pages and page images are made in
+the format. All mapping information is specified by the user.
+
+The logical page numbering or naming that can be presented to the user
+is specified in the \texttt{name} tags while the physical numbering of
+the page images is specified in the \texttt{index} or \texttt{url}
+tags.
+
+\begin{description}
+\item[toc] container for document structure
+
+ \begin{description}
+ \item[page] describes a single logical page
-\url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TS_lise}
+ \begin{description}
+ \item[name] the ``name'' of the logical page. This can be any string
+ like a page number (arabic, roman, etc.) or a special designation
+ like ``Table 5''.
+
+ \item[index] the \texttt{digilib} index number\footnote{The index
+ number for digilib is the index in the alphabetical ordering of the
+ scan file names.} of the scan image of the page.
+
+ \item[file] alternatively the file name (preferrably without extension) of the
+ scan image of the page.
+
+ \item[url] alternatively the full URL of the scan image of the
+ page can be used.
+ \end{description}
+
+ \item[chapter] describes a section or chapter of the text.
+ \texttt{chapter} elements can be nested.
-\url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TSlise/lise_downloads/deimel1929.xml}
+ \begin{description}
+ \item[name] the title of the chapter or section.
+
+ \item[start] the beginning of a page range (usually the first page
+ of the chapter). The \texttt{start} element has an optional
+ \texttt{increment} attribute to indicate the number of logical
+ pages on a scan image.\footnote{This information is only needed by
+ additional tools that try to generate lists of all page and
+ image numbers.}
+
+ \begin{description}
+ \item[name] the ``name'' of the first page (see \texttt{page}).
+
+ \item[index] the index of the first page (see \texttt{page}).
+
+ \item[file] the file name of the first page (see \texttt{page}).
+
+ \item[url] the URL of the first page (see \texttt{page}).
+ \end{description}
+
+ \item[end] the end of a page range (usually the last page of the
+ chapter).
+
+ \begin{description}
+ \item[name] the ``name'' of the last page (see \texttt{page}).
+
+ \item[index] the index of the last page (see \texttt{page}).
+
+ \item[file] the file name of the first page (see \texttt{page}).
+
+ \item[url] the URL of the last page (see \texttt{page}).
+ \end{description}
+
+ \item[page] alternative (and additional) to
+ \texttt{start}/\texttt{end} page ranges single \texttt{page}
+ elements can be used inside \texttt{chapter}.
+ \end{description}
+ \end{description}
+\end{description}
+%%\url{http://pythia.mpiwg-berlin.mpg.de/toolserver/TS_lise}
-\subsection{Information on scanned images}
+
+\subsection{Digital images}
\label{sec:inform-scann-imag}
Image files representing scanned images can have an \texttt{img}
@@ -211,86 +893,439 @@ of the original image. This information
Required is one of three possible sets of tags:
\begin{description}
-\item[original-size-x] The width of the original image. The unit of
- measure can be contained as parameter \texttt{unit}, the default is
- meter ``m''. The width to be considered is the total width of the
- scanned area.
+\item[img] digital image information.
+
+ \begin{description}
+ \item[original-size-x] The width of the original
+ image -- required. \\
+ The unit of measure can be contained as parameter \texttt{unit},
+ the default is meter ``m''. The width to be considered is the
+ total width of the scanned area.
+
+ \item[original-size-y] The height of the original image -- required.
+
+ \item[original-pixel-x] The width of the hi-res scan in pixels -- deduced.
+
+ \item[original-pixel-y] The height of the hi-res scan in pixels -- deduced.
+ \end{description}
+\end{description}
+
+or
+
+\begin{description}
+\item[img] digital image information.
-\item[original-size-y] The height of the original image.
+ \begin{description}
+ \item[original-dpi-x] The resolution of the hi-res scan in its width
+ in pixels per inch -- required.
-\item[original-pixel-x] The width of the hi-res scan in pixels.
+ \item[original-dpi-y] The resolution of the hi-res scan in its height
+ in pixels per inch -- required.
-\item[original-pixel-y] The height of the hi-res scan in pixels.
+ \item[original-pixel-x] The width of the hi-res scan in pixels -- deduced.
+
+ \item[original-pixel-y] The height of the hi-res scan in pixels -- deduced.
+ \end{description}
\end{description}
or
\begin{description}
-\item[original-dpi-x] The resolution of the hi-res scan in its width
- in pixels per inch.
+\item[img] digital image information.
-\item[original-dpi-y] The resolution of the hi-res scan in its height
- in pixels per inch.
+ \begin{description}
+ \item[original-dpi] The resolution of the hi-res scan in pixels per
+ inch if the resolutions in width and height are the same -- required.
+
+ \item[original-pixel-x] The width of the hi-res scan in pixels -- deduced.
+
+ \item[original-pixel-y] The height of the hi-res scan in pixels -- deduced.
+ \end{description}
\end{description}
-or
+
+
+\subsection{Digital image acquisition}
+\label{sec:inform-about-image}
+
+A description of the technology used in the process of producing a
+digital image.
+
+\begin{description}
+\item[image-acquisition] description of the image production process
+ \begin{description}
+ \item[device] acquisition device (e.g. ``flatbed scanner'')
+
+ \item[image-type] type and color-depth of the image -- required (e.g. ``RGB 24
+ bit'')
+
+ \item[production-comment] additional textual information about the
+ production process
+ \end{description}
+\end{description}
+
+
+
+\subsection{Full text with images}
+\label{sec:full-text-with}
+
+The relation between a full text and optional images of
+whole pages or parts of pages must be specified in a
+\texttt{texttool} container.
+
+Full text in a XML format should be specified with a
+\texttt{content-type}\footnote{see section~\ref{tag-content-type}
+on page\pageref{tag-content-type}} ``fulltext''.
+
\begin{description}
-\item[original-dpi] The resolution of the hi-res scan in pixels per
- inch if the resolutions in width and height are the same.
+\item[texttool] representation of full text with images
+
+ \begin{description}
+ \item[text] the file name of the full text file (path
+ inside document directory)
+
+ \item[text-url-path] a characteristic part of the URL with which the
+ full text can be retrieved (the form and content of this element
+ is dependent on the specific text retrieval mechanism)
+
+ \item[image] the directory name of the directory containig the
+ page image files (path inside document directory)
+
+ \item[figure] the directory name of the directory containig the
+ in-page figure image files (path inside document directory)
+
+ \item[page-flow] the reading direction of pages in this document:
+ either ``ltr'' (left-to-right, standard western page flow) or
+ ``rtl'' (right-to-left, still assuming increasing page numbers)
+
+ \item[odd-scan-orientation] the orientation of all odd-numbered scan
+ pages regardless of \texttt{page-flow}: either ``left'' (all
+ odd-numbered scans are left hand sides) or ``right'' (all
+ odd-numbered scans are right hand sides, i.e. the first left hand
+ side is scan number 2).
+
+ \item[title-scan-no] the index number of the scan of the title page or the
+ first text page
+
+ \item[xslt] the file name of an additional XSL transformation
+ file
+
+ \item[pagebreak] the name of the element that indicates page breaks
+ (default ``pb'')
+ \end{description}
\end{description}
-\subsection{Access restrictions}
-\label{sec:access-restrictions}
-If the access to a resource is restricted for technical or legal
-reasons then the restrictions can be put in a
-\texttt{access-restrictions} container. The format of the information
-inside the container has to be further specified.
+\subsection{Copyright and access conditions}
+\label{sec:access-conditions}
+
+If the access to a resource is bound to conditions for technical or legal
+reasons then the conditions can be put in a \texttt{access-conditions}
+container. Other usage conditions like copyright can also be
+documented in this container.
+
+\begin{description}
+\item[access-conditions] legal and technical conditions for access to
+ this resource. \\
+ The \texttt{attribution}, \texttt{copyright}, and \texttt{access}
+ tags can be repeated with different \texttt{resource} attributes if
+ different conditions apply to different parts of the whole resource.
+
+ \begin{description}
+ \item[attribution] The name or institution this resource should be
+ attributed to when it's publicly presented. \\
+ The kind of resource this condition applies to can be specified with a
+ \texttt{resource} attribute with the values ``original'' (the
+ physical object that was scanned), ``digital-image'' (the scanned
+ images), ``text'' (the textual transcript). \\
+ All tags inside can be repeated.
+
+ \begin{description}
+ \item[name] a name (free text)
+
+ \item[url] a URL (with an optional \texttt{label} attribute to show
+ as text)
+
+ \item[description] more information (free text, e.g. holding
+ library call number)
+ \end{description}
+
+ \item[copyright] the copyright holder and the copyright conditions. \\
+ The kind of resource this condition applies to can be specified with a
+ \texttt{resource} attribute with the values ``original'' (the physical object
+ that was scanned), ``digital-image'' (the scanned images), ``text''
+ (the textual transcript).
+
+ \begin{description}
+ \item[owner] the name of the copyright holder
+ \begin{description}
+ \item[name] a name (free text)
+
+ \item[url] a URL (with an optional \texttt{label} attribute to show
+ as text) identifying the copyright holder
+ \end{description}
+
+ \item[date] the date when the copyright was issued
+
+ \item[duration] the duration of the copyright term (if known)
+
+ \item[description] free-text field for special or additional
+ conditions
+ \item[license] the type of license if its a standardised license e.g. Creative Commons
+ \begin{description}
+ \item[url] a URL representing the license e.g. \url{http://creativecommons.org/licenses/by/3.0/}
+ \end{description}
+
+ \end{description}
+
+
+ \item[access] conditions of access to this resource. Different
+ access types are specified by a \texttt{type} attribute.
+ The kind of resource this condition applies to can be specified with a
+ \texttt{resource} attribute with the values ``digital-image'' (the
+ scanned images), or ``text'' (the textual transcript).
+
+ \begin{description}
+ \item[type=group] access restricted to the members of this named
+ group. The method to identify a user belonging to a named group
+ is not specified in this document.
+ \begin{description}
+ \item[name] name of the group.
+
+ \item[only-before] the access condition is only valid before the
+ given date (format: ``YYYY/MM/DD'').
+
+ \item[only-after] the access condition is only valid after the
+ given date (format: ``YYYY/MM/DD'').
+ \end{description}
+
+ \item[type=institution] access restricted to the members of this
+ institution. The method to identify a user to belong to the
+ institution is not specified in this document.
+ \begin{description}
+ \item[name] name of the group.
+
+ \item[only-before] the access condition is only valid before the
+ given date (format: ``YYYY/MM/DD'').
+
+ \item[only-after] the access condition is only valid after the
+ given date (format: ``YYYY/MM/DD'').
+ \end{description}
+
+ \item[type=subnet] access restricted to all computers with an
+ IP-address in this subnet.
+ \begin{description}
+ \item[range] subnet range defined in
+ truncated-quad (e.g. ``141.14''), network-netmask
+ (e.g. ``141.14.0.0/255.255.0.0''), or network-range
+ (e.g. ``141.14.0.0/16'') notation.
+
+ \item[only-before] the access condition is only valid before the
+ given date (format: ``YYYY/MM/DD'').
+
+ \item[only-after] the access condition is only valid after the
+ given date (format: ``YYYY/MM/DD'').
+ \end{description}
+
+
+ \item[type=scientific] access to this resource should be restricted to
+ scientific work
+ \begin{description}
+ \item[only-before] the access condition is only valid before the
+ given date (format: ``YYYY/MM/DD'').
+
+ \item[only-after] the access condition is only valid after the
+ given date (format: ``YYYY/MM/DD'').
+ \end{description}
+
-\section{Sample metadata file for an ECHO resource}
+ \item[type=free] access to this resource is not restricted
+ \begin{description}
+ \item[only-before] the access condition is only valid before the
+ given date (format: ``YYYY/MM/DD'').
+
+ \item[only-after] the access condition is only valid after the
+ given date (format: ``YYYY/MM/DD'').
+ \end{description}
+
+
+ \item[type=special] if none of the above conditions seems appropriate,
+ a free-form text can be specified here.
+ \begin{description}
+ \item[description] description of special access conditions.
+
+ \item[only-before] the access condition is only valid before the
+ given date (format: ``YYYY/MM/DD'').
+
+ \item[only-after] the access condition is only valid after the
+ given date (format: ``YYYY/MM/DD'').
+ \end{description}
+
+ \end{description}
+ \end{description}
+\end{description}
+
+\noindent
+It should be noted that control over access to the resource has to be
+provided by additional technical measures. Access conditions in the
+metadata file only state that conditions \emph{should} be observed, it
+is not implied that they \emph{are} necessarily observed, as the
+enforcement of conditions depends on additional measures.
+
+
+
+\subsection{Acquisition of raw-data}
+\label{sec:acqu-inform}
+
+Information about the acquisition source for raw data resources can be
+provided in an \texttt{acquisition} container.
+
+\begin{description}
+\item[acquisition] the acquisition source of this resource -- required
+ for raw data.
+ \begin{description}
+ \item[provider] where this resource came from -- required
+ \begin{description}
+ \item[name] free-text name of the provider (institution or
+ individual)
+
+ \item[address] address of the provider
+
+ \item[contact] contact person at the provider (i.e. name and email)
+
+ \item[url] URL related to the provider
+
+ \item[provider-id] id of the provider (internally used) -- deduced
+ \end{description}
+
+ \item[date] date of acquisition -- required
+
+ \item[description] free-text description of the acquisition source or
+ additional information
+ \end{description}
+\end{description}
+
+
+
+\subsection{Documentary Films}
+\label{sec:documentary-films}
+
+Documentary films can be described using a \texttt{film-acquisition}
+container.
+
+\begin{description}
+\item[film-acquisition] description of a (documentary) film --
+ required for documentary film
+ \begin{description}
+ \item[recording] specification of the recording process
+ \begin{description}
+ \item[author] the person or persons doing the recording
+
+ \item[date] the date or time span when the film was recorded
+
+ \item[location] the place where the film was recorded
+
+ \item[device] recording device used (e.g. ``Sony CP-DV8 Camcorder'')
+
+ \item[format] format of the recorded film -- required (e.g. ``DV
+ 720x524 25fps interlaced'')
+ \end{description}
+
+ \item[description] free-form description of the recording and the
+ content of the film
+ \end{description}
+\end{description}
+
+(More information about the digitization step could be added in a
+\texttt{digitization} tag similar to the \texttt{recording} tag.)
+
+
+
+
+\section{Sample metadata files for ECHO resources}
+
+The following is a sample metadata index file for a directory containig a
+scanned document.
+
+\begin{small}
+\begin{verbatim}
+
+ Fleck, 1980
+ fleck.1980
+ University of Bern
+ ubern/wiss-theorie
+ scanned images
+
+ echo23a45e2329x
+ ger
+
+ Fleck, Ludwik
+ 1980
+ Entstehung und Entwicklung einer
+ wissenschaftlichen Tatsache
+
+
+
+
+ Frankfurt am Main
+ Suhrkamp
+
+
+
+
+ Wissenschaftstheorie, Fleck, Tatsache
+
+
+
+
+ Scanned images (300dpi)
+ img
+
+
+\end{verbatim}
+\end{small}
-The following is the sample structure for a scanned document resource.
+The following is a sample metadata file for a single image of an
+architectural drawing.
+\begin{small}
\begin{verbatim}
-
-
- fleck.1980
- University of Bern
-
- ubern/wiss-theorie
- scanned images
+
+ Bibliotheca Hertziana
+ scanned images
+
+ 00000271-asl-160-r-full.tif
- echo23a45e2329x
-
- Fleck, Ludwik
- 1980
- Entstehung und Entwicklung einer
- wissenschaftlichen Tatsache
-
-
-
-
- Frankfurt am Main
- Suhrkamp
-
-
-
-
- Wissenschaftstheorie, Fleck, Tatsache
-
-
+
+ 315
+
+ echo45a67bc4367d
+ ita
+
+ Ciolli, Giacomo
+ Urban VIII; Barberini, Maffeo
+ Accademia di San Luca
+ Roma
+ 1706
+
+
+
+
+
+
+
+ http://colosseum.biblhertz.it:8080/Lineamenta/
+ 1033478408.39/1035196181.35/1035196204.09/1035394121.83
+
+
-
- Scanned images (300dpi)
- img
-
-
-
+
\end{verbatim}
+\end{small}
\end{document}