Mercurial > hg > openmind
view src/main/java/org/mpi/openmind/scripts/Indexmeta.java @ 90:4b6c0b368f46
new UpdateMpiwgDigitalizations script.
author | Robert Casties <casties@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 29 May 2018 21:15:06 +0200 |
parents | 615d27dce9b3 |
children |
line wrap: on
line source
package org.mpi.openmind.scripts; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.Scanner; import org.apache.commons.lang.StringUtils; import org.mpi.openmind.cache.WrapperService; import org.mpi.openmind.repository.bo.Attribute; import org.mpi.openmind.repository.bo.Entity; import org.mpi.openmind.repository.bo.Node; import org.mpi.openmind.repository.services.ServiceRegistry; import org.w3c.dom.Document; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathFactory; /** * This script should do mainly two things: * 1. Create an attribute called mpiwg_id * 2. Create an attribute called indexmeta_folder * * * @author jurzua * */ public class Indexmeta { //http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=/permanent/library/SWE97E52/index.meta private static String ECHO_URL = "http://echo.mpiwg-berlin.mpg.de"; private static String ATT_INDEXMETA_FOLDER = "indexmeta_folder"; private static String ATT_MPIWG_ID = "mpiwg_id"; private static String CODEX = "CODEX"; public static String userName = "indexmeta-script"; private static void saveMpiwgIds(List<Identifier> mpiwgIdList, WrapperService ws) throws Exception{ for(Identifier item : mpiwgIdList){ System.out.println(item); Entity codex = ws.getEntityByIdWithContent(item.ismiId); if(codex == null) throw new Exception("Codex " + item.ismiId + " was not found."); //Setting indexmeta_folder if(codex.getAttributeByName(ATT_INDEXMETA_FOLDER) == null){ Attribute att = new Attribute(ATT_INDEXMETA_FOLDER, "text", item.indexmeta); codex.addAttribute(att); }else{ codex.getAttributeByName(ATT_INDEXMETA_FOLDER).setValue(item.indexmeta); } //Setting mpiwg_id if(codex.getAttributeByName(ATT_MPIWG_ID) == null){ Attribute att = new Attribute(ATT_MPIWG_ID, "text", item.mpiwgId); codex.addAttribute(att); }else{ codex.getAttributeByName(ATT_MPIWG_ID).setValue(item.mpiwgId); } ws.saveAssertion(codex, userName); } } private static void prepareDefinitions(WrapperService ws) throws Exception{ Entity codex = ws.getDefinition(CODEX); Attribute attIndexmetaFolder = ws.getDefAttributeByOwnValue(CODEX, ATT_INDEXMETA_FOLDER); if(attIndexmetaFolder == null){ attIndexmetaFolder = new Attribute(Node.TYPE_TBOX, "text", ATT_INDEXMETA_FOLDER); attIndexmetaFolder.setSourceId(codex.getId()); attIndexmetaFolder.setSourceObjectClass(Node.TYPE_TBOX); attIndexmetaFolder.setSourceModif(codex.getModificationTime()); attIndexmetaFolder.setSystemStatus(Node.SYS_STATUS_CURRENT_VERSION); ws.saveDefAttribute(attIndexmetaFolder, userName); } Attribute attMpiwgwgId = ws.getDefAttributeByOwnValue(CODEX, ATT_MPIWG_ID); if(attMpiwgwgId == null){ attMpiwgwgId = new Attribute(Node.TYPE_TBOX, "text", ATT_MPIWG_ID); attMpiwgwgId.setSourceId(codex.getId()); attMpiwgwgId.setSourceObjectClass(Node.TYPE_TBOX); attMpiwgwgId.setSourceModif(codex.getModificationTime()); attMpiwgwgId.setSystemStatus(Node.SYS_STATUS_CURRENT_VERSION); ws.saveDefAttribute(attMpiwgwgId, userName); } } private static List<Identifier> readFile(){ List<Identifier> rs = new ArrayList<Indexmeta.Identifier>(); try{ Scanner scan = new Scanner( new File("/Users/jurzua/Projects/ISMI/workspace/ismi-richfaces/docs/echo/archiv_id_ismi_online_2014.05.23.tab")); String line=""; //int readline = Integer.parseInt(scan.nextLine());// while (scan.hasNextLine()) { line = scan.nextLine(); Identifier id = new Identifier(line); if(id.isValid){ rs.add(id); } } }catch(Exception e){ e.printStackTrace(); } return rs; } public static class Identifier{ public Long ismiId; public String indexmeta; public String mpiwgId; public String label; public boolean isValid = false; public Identifier(String line) throws Exception{ String[] split = line.split("\t"); if(split.length >= 3){ this.indexmeta = split[0]; this.label = split[1]; this.ismiId = Long.parseLong(split[2]); String link0 = indexmeta.replace("/mpiwg/online", ""); String link = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=" + link0 + "/index.meta"; this.mpiwgId = getMpiwgId(link); this.isValid = (StringUtils.isNotEmpty(mpiwgId) && ismiId != null); } } @Override public String toString(){ return label + "\t[" + indexmeta + ", " + ismiId + ", " + mpiwgId + "]"; } } public static void execute(){ ServiceRegistry sr = new ServiceRegistry(); try { prepareDefinitions(sr.getWrapper()); saveMpiwgIds(readFile(), sr.getWrapper()); } catch (Exception e) { e.printStackTrace(); } } public static void main(String[] args) { execute(); System.exit(0); } private static Document getDocument(String link){ try { Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(link); return doc; } catch (Exception e) { e.printStackTrace(); } return null; } public static String getMpiwgId(String link) throws Exception{ Document doc = getDocument(link); XPathFactory xPathfactory = XPathFactory.newInstance(); XPath xpath = xPathfactory.newXPath(); return (String) xpath.compile("//resource/meta/dri[@type='mpiwg']").evaluate(doc, XPathConstants.STRING); } }