#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
# Time-stamp: <2019-09-13 15:17:52 (kthoden)>


__doc__="""This module provides eight functions that might be useful for some scripts, for example Filter_3_05_add_basic_xml.py."""

def get_metadata(echopath):
    """Use python's lxml to get the metadata from the index.meta file."""
    import urllib
    import sys
    from lxml import etree
    import common_functions

    repo = "http://content.mpiwg-berlin.mpg.de/mpiwg/online/"
    # repo = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=/"
    fullpath = "%s%s/index.meta" % (repo, echopath)

    common_functions.consoleDebug("Trying to get metadata")

    # Get the file and read it
    try:
        usock=urllib.urlopen(fullpath)
    except IOError as (errno, strerror):
        common_functions.printlnInERROR("I/O error ({0}): {1}".format(errno,strerror))
        sys.exit()

    try:
        indexmetaTree = etree.parse(usock)
    except etree.XMLSyntaxError:
        common_functions.printlnInERROR("Online resource %s not found. Keep in mind that you must specify the whole path, e. g. /permanent/library/UR271U6Y." % fullpath)
        sys.exit()
    usock.close()

    # first, we need to check the version (and hopefully, the version of index.meta has been written into the root tag)
    version = indexmetaTree.xpath('/resource/@version')

    if len(version) == 0 or version[0] != "2.0":
        common_functions.consoleDebug("found an older index.meta")
        return(parse_old_index_meta(indexmetaTree))
    elif version[0] == "2.0":
        common_functions.consoleDebug("index.meta 2.0 found")
        return(parse_index_meta(indexmetaTree))
# def get_metadata ends here


def get_gnd(creator):
    """Parses the creator string for semicolon which is used to
    delimit several creators. Right now, it is hardcoded to '; ', but
    this might not always be the case."""

    import json
    import common_functions
    import codecs
    import os

    # use the json file
    GND_FILE = (os.environ["ECHO_SCRIPTS_DIR"]+"/share/texts/resources/gnd.json")
    gnd = json.load(codecs.open(GND_FILE, 'r', 'utf-8'))
    # initialize a list
    creators=[]
    # we might have several creators, split by "; ". The rule is a bit hard-coded at the moment
    creators=creator.split("; ")

    # initialize another list
    author_list=[]

    # Loop through all the creators, find their gnd and put them in a
    # list. If the gnd is not there, we put that into the list.
    for i in creators:
        try:
            creator_item = (gnd[i],i)
            print('Found GND of author (%s) in the database. If you want to check this entry in the DNB, please <a href="http://d-nb.info/gnd/%s" target="_blank"= >click here</a>.' % (i,gnd[i]))
            common_functions.consoleStatus('Found GND of author (%s) in the database. If you want to check this entry in the DNB, please <a href="http://d-nb.info/gnd/%s" target="_blank"= >click here</a>.' % (i,gnd[i]))
            # common_functions.printlnInConsole('Found GND of author (%s) in the database. If you want to check this entry in the DNB, please <a href="http://d-nb.info/gnd/%s" target="_blank"= >click here</a>.' % (i,gnd[i]))
            author_list.append(creator_item)
        except KeyError:
            # common_functions.printlnInConsole("GND of author (%s) is not in database." % i)
            common_functions.consoleWarn("GND of author (%s) is not in database." % i)
            creator_item = ("GND not available",i)
            author_list.append(creator_item)
    return author_list
# def get_gnd ends here

def get_lang_list(language):
    import json
    import common_functions
    import codecs
    import os

    common_functions.consoleDebug("Language 1: %s!" % language)

    def find_key(dic,val):
        """The json dictionary should be searchable both ways. Thanks to the internet for that solution"""
        return [k for k, v in dic.iteritems() if v == val][0]
    # def find_key ends here

    # use the json file for looking up language codes
    ISO639_FILE = (os.environ["ECHO_SCRIPTS_DIR"]+"/share/texts/resources/iso639.json")
    # fh = codecs.open(ISO639_FILE, 'r', 'utf-8')
    # content = fh.read()
    iso639 = json.load(codecs.open(ISO639_FILE, 'r', 'utf-8'))
    lang_list = []

    if len(language) == 0:
        lang_list.append(language)
        lang_list.append(language)
    if len(language) == 2:
        lang_list.append(language)
        lang_list.append(iso639[language])
    elif len(language) >=3:
        lang_list.append(find_key(iso639,language))
        lang_list.append(language)

    return lang_list
# def get_lang_list ends here

def parse_index_meta(indexmetaTree):
    """Function for parsing the 2.0 style of index.metas.
    """
    from lxml import etree
    import common_functions

    common_functions.consoleDebug("We are hypermodern. This is index.meta 2.0 with lxml!")

    indexmeta2dot0 = True

    # title. Assume (not good) that the main title is also the first one
    alltitles = indexmetaTree.xpath('/resource/meta/bib/title[not(@type="alternate")]')
    if len(alltitles) > 1:
        common_functions.consoleWarn("There are %d different titles! Using first one." % len(alltitles))
    title = alltitles[0].text
    # alternative titles
    alternative_titles = indexmetaTree.xpath('/resource/meta/bib/title[@type="alternate"]')
    common_functions.consoleDebug("Number of alternate titles: %s" % len(alternative_titles))
    if len(alternative_titles) > 1:
        common_functions.consoleWarn("There are %d different alternate titles! Using first one." % len(alternative_titles))
        alt_title = alternative_titles[0].text
    if len(alternative_titles) == 1:
        alt_title = alternative_titles[0].text
    if len(alternative_titles) == 0:
        alt_title = "empty"

    # author. What if there are several ones?
    allauthors = indexmetaTree.xpath('/resource/meta/bib/person[@role="author"]/name')
    # common_functions.consoleDebug("hallo. Author. %s" % allauthors[0].text)
    # if len(allauthors) == 0:
    #         common_functions.consoleDebug("No author given")
    # initialize two lists
    author_list = []
    # an item per author, should contain name, type of id and id itself
    i = 0
    while i < len(allauthors):
        author_item = []
        # print i
        author_item.append(allauthors[i].text)
        try:
            author_item.append(indexmetaTree.xpath('/resource/meta/bib/person[@role="author"][%d]/identifier/@type' % (1 + i) )[0])
        except IndexError:
            author_item.append("no ID type")
            common_functions.consoleDebug("hallo. Author. %s" % allauthors[0].text)
            pass
        try:
            author_item.append(indexmetaTree.xpath('/resource/meta/bib/person[@role="author"][%d]/identifier' % (i + 1) )[0].text)
        except IndexError:
            author_item.append("no ID")
            common_functions.consoleDebug("hallo. Author. %s" % allauthors[0].text)
            pass
        author_list.append(author_item)
        i = i + 1

    # author_idType = indexmetaTree.xpath('/resource/meta/bib/person[@role="author"]/identifier/@type')
    # author_idValue = indexmetaTree.xpath('/resource/meta/bib/person[@role="author"]/identifier')

    # language, we should produce a list here, too, containing both 2 and 3 (or more) letter codes
    try:
        lang = indexmetaTree.xpath('/resource/lang')[0].text
    except IndexError:
        try:
            lang = indexmetaTree.xpath('/resource/meta/lang')[0].text
        except IndexError:
            try:
                lang = indexmetaTree.xpath('/resource/meta/bib/title/@lang')[0]
            except IndexError:
                common_functions.consoleWarn('Could not find any language information.')
                lang = ""
                pass
    lang_list = get_lang_list(lang)
  # date
    date = indexmetaTree.xpath('/resource/meta/bib/date')[0].text
    # rights
    rights = indexmetaTree.xpath('/resource/meta/access-conditions/access/@type')[0]
    # dri identifier 	<dri type="mpiwg">MPIWG:WBGMR64C</dri></meta>
    dri = indexmetaTree.xpath('/resource/meta/dri[@type="mpiwg"]')[0].text

    # insert the creator by using this function
    # get_gnd(creator)
    # and export this list which can then be inserted anyway you want
    # metadata_list = (lang, title, date, creator_list, rights)
    metadata_list = (lang_list, title, date, author_list, rights, indexmeta2dot0, alt_title, dri)
    return metadata_list
# def parse_index_meta ends here

def parse_old_index_meta(indexmetaTree):
    """Function for parsing the 1.2 style of index.metas. Or version 2.0 without it being stated"""

    from lxml import etree
    import common_functions

    common_functions.consoleDebug('This seems to be an older index.meta. If not, please put version="2.0" in the root tag.')
    indexmeta2dot0 = False

    # title
    title = indexmetaTree.xpath('/resource/meta/bib/title')[0].text
    # just in case
    alt_title = "empty"
    # author. What if there are several ones?
    author = indexmetaTree.xpath('/resource/meta/bib/author')[0].text
    # language
    # lang should be outside of bib, but this not necessarily so
    if len(indexmetaTree.xpath('/resource/meta/lang')) == 0:
        common_functions.consoleDebug('No lang tag found in resource/meta.')
        lang = indexmetaTree.xpath('/resource/meta/bib/lang')[0].text
    lang_list = get_lang_list(lang)
    # date
    date = indexmetaTree.xpath('/resource/meta/bib/year')[0].text
    # rights
    rights = indexmetaTree.xpath('/resource/meta/access-conditions/access/@type')[0]
    # dri identifier 	<dri type="mpiwg">MPIWG:WBGMR64C</dri></meta>
    dri = indexmetaTree.xpath('/resource/meta/dri[@type="mpiwg"]')[0].text

    # insert the creator by using this function
    author_list = get_gnd(author)

    # and export this list which can then be inserted anyway you want
    metadata_list = (lang_list, title, date, author_list, rights, indexmeta2dot0,alt_title,dri)
    return metadata_list
# def parse_old_index_meta ends here


def get_pageimg(echopath):
    """We get the names of the image files also via http, so it is not
    necessary to mount Foxridge. The return is a list containing two
    lists. The first list contains the filenames, the second list
    contains the corresponding index."""

    import urllib
    import sys
    from lxml import etree
    import common_functions

    # parse index.meta
    # first, we need the real path to the images, because, it is not necessarily called pageimg
    repo = "http://content.mpiwg-berlin.mpg.de/mpiwg/online/"
    # repo = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=/"
    fullpath = "%s%s/index.meta" % (repo, echopath)

    common_functions.consoleStatus("Trying to get metadata.")

    # Get the file and read it
    try:
        usock=urllib.urlopen(fullpath)
    except IOError as (errno, strerror):
        common_functions.printlnInERROR("I/O error ({0}): {1}".format(errno,strerror))
        sys.exit()

    try:
        indexmetaTree = etree.parse(usock)
    except etree.XMLSyntaxError:
        common_functions.printlnInERROR("Online resource %s not found. Keep in mind that you must specify the whole path, e. g. /permanent/library/UR271U6Y." % fullpath)
        sys.exit()
    usock.close()

    # first, we need to check the version (and hopefully, the version of index.meta has been written into the root tag)
    version = indexmetaTree.xpath('/resource/@version')

    # common_functions.Debug('Version %s.' % version)

    if len(indexmetaTree.xpath('/resource/meta/texttool/image')) == 0:
        common_functions.consoleWarn('There seems to be no figure tag in the index meta. Using "pageimg" as default. Possibly, the whole "texttool" entry is missing.')
        pageimg_path = "pageimg"
    else:
        pageimg_path = indexmetaTree.xpath('/resource/meta/texttool/image')[0].text

    common_functions.consoleDebug('Path to pageimages: %s.' % pageimg_path)

    pageimg = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/api/dirInfo-xml.jsp?fn=/%s/%s/" % (echopath,pageimg_path)
    common_functions.consoleDebug(pageimg)
    # end of indexmeta parsing

    # now for the real stuff
    from lxml import etree

    return_list = []

    # Same as above, parsing the xml. We have some debugging
    # information here, because sometimes communicating with the
    # digilib service fails.

    common_functions.consoleDebug("usock=urllib.urlopen(pageimg)")
    try:
        usock=urllib.urlopen(pageimg)
    except IOError as (errno, strerror):
        common_functions.printlnInERROR("I/O error ({0}): {1}".format(errno,strerror))
        sys.exit()
    common_functions.consoleStatus("Getting pageimages from  %s ..." % echopath)

    piTree = etree.parse(usock)
    usock.close()

    # Testing if the new solution works as well. First the old one
    # page_list = []
    # files = piTree.xpath('/dir/file/fsname')
    # for item in files:
    #     page_list.append(item.text)

    # index_list = []
    # indices = piTree.xpath('/dir/file/index')
    # for item in indices:
    #     index_list.append(item.text)

    # This is the new one
    page_list = piTree.xpath('/dir/file/fsname')
    index_list = piTree.xpath('/dir/file/index')

    # common_functions.consoleDebug("pagelist %s" % page_list)

    return_list.append(page_list)
    return_list.append(index_list)

    return return_list
# def get_pageimg ends here

def get_id(echopath):
    """Parses the echopath and returns the identifier"""
    import string
    paths = string.split(echopath,"/")
    if len(paths[-1]) == 0:
        echoid = paths[-2]
    else:
        echoid = paths[-1]
    return echoid
# def get_id ends here


def get_unknown(echoorig):
    """Opens a subprocess to call WS's Check Unknown characters. We
    put this in, because TextWrangler's output is garbled at the
    moment. """

    import subprocess
    import common_functions
    global unknown_uni

    common_functions.consoleStatus("Checking text for unknown characters.")

    # executing perl script
    perl_filter = subprocess.Popen(["perl","Filter_2_04_check_unknown_characters.pl",echoorig],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
    # Whatever this is for
    stdout,stderr = perl_filter.communicate()
    # Solution!
    # seems to work if we decode this to unicode, solution is from http://farmdev.com/talks/unicode/
    unknown_uni = stderr.decode('utf-8')
# def get_unknown ends here.
