Mercurial > hg > purlService
view harvestToPurl.py @ 8:733d43b30a82
connection handling changed
searcher added
author | dwinter |
---|---|
date | Fri, 02 Nov 2012 11:34:23 +0100 |
parents | 107f13ca333b |
children | 1b2d74f94ca8 |
line wrap: on
line source
''' Created on 31.10.2012 @author: dwinter ''' import manageIndexMetaPURLs import os from os.path import join, getsize import sys import re from lxml import etree def harvestIndexMeta(path,user,delpath="",replacepath="", update=False): md = manageIndexMetaPURLs.IndexMetaPURLManager() for root, dirs, files in os.walk(path): for name in files: if name.endswith(".meta"): fl=join(root, name) imagePath=createImagePath(fl,root) imagePath=re.sub("^"+delpath,replacepath,imagePath) fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt val,purl = md.register(fl, True, user=user,imagePath=imagePath,update=update) try: if val==manageIndexMetaPURLs.ALREADY_EXISTS: print "found %s -> %s"%(fl,purl) elif val==manageIndexMetaPURLs.UPDATED: print "updated %s -> %s"%(fl,purl) else: print "added %s -> %s"%(fl,purl) except: print "cannot print: %s"%purl if 'pageimg' in dirs: dirs.remove('pageimg') # don't visit pageimf for dir in dirs: if dir== "pageimg": dirs.remove('pageimg') if dir.startswith("."): dirs.remove(dir) if dir.startswith(":"): dirs.remove(dir) # erzeugt einen imagepath wenn kein texttooltag existiert def createImagePath(path,root): print "parsing: %s"%path try: tree= etree.parse(path) except: print "cannot parse %s"%path return "" #teste ob texttool tag, dann kein imagePath tt =tree.xpath('//texttool') if len(tt)>0: return "" #im anderen fall, heuristic imageFolders=["pageimg","pages"] for imageFolder in imageFolders: fl=join(root, imageFolder) if os.path.exists(fl): # gibt es einen der folder return fl return "" if __name__ == '__main__': args = sys.argv[1:] if not (len(args)==2 or len(args)==3 or len(args)==4): print "USAGE: python harvestToPurl.py path user (optional)pathPrefixToDelete (optional)replacedeleted" sys.exit(2) path=args[0] user=args[1] delpath="" replacepath="" if len(args)==3: delpath=args[2] elif len(args)==4: delpath=args[2] replacepath=args[3] if not os.path.exists(path): print "ERROR: path %s does not exist!"%path sys.exit(2) harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath,update=True)