view doOCR_old.py @ 2:90c0df483890 default tip

inital3
author dwinter
date Tue, 19 Aug 2014 14:25:29 +0200
parents 5b7ed10ecbb4
children
line wrap: on
line source

from _csv import Error
#start ="/Volumes/Folivora/MPG_Archiv/"
start ="/Volumes/Folivora/MPG_Archiv/Publikationen der MPG"
target = "/Volumes/MPGARCHIV/struct2/"

pathAdd = True #add the path of the file to the filename

import os
import os.path
import shutil

errorf = open("/tmp/error.out","w",encoding="utf-8")
errorf.write("START")

for root, dirs, files in os.walk(start, topdown=False):
    for name in files:
        path = os.path.join(root, name)
        ext = os.path.splitext(path)
        print (name)
        if ext[1] == ".pdf":
            if pathAdd:
                fld = os.path.split(root)
                print (fld)
                neu = os.path.join(target,fld[1].replace(" ","_")+"___"+name.replace(" ","_"))
            else:
                neu = os.path.join(target,name.replace(" ","_"))
            pathNeu,ext=os.path.splitext(neu)
            
            if not os.path.exists(pathNeu):
                
                os.makedirs(pathNeu)
                os.makedirs(pathNeu+"/full")
                
                try:
                    neu = pathNeu+"/full/"+name.replace(" ","_")
                    shutil.copy(path, neu)
                except:
                    errorf.write(path+"\n")

                print (path)
            else:
                print("EXISTS:" + path)
                            
errorf.close();   
            
        
        
    #for name in dirs:
    #    print(os.path.join(root, name))