annotate harvestmeta.pl @ 0:30497c6a3eca

Initial revision
author casties
date Thu, 17 Jun 2004 17:58:42 +0200
parents
children 1a51f94d5dbd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
1 #!/usr/local/bin/perl -w
30497c6a3eca Initial revision
casties
parents:
diff changeset
2
30497c6a3eca Initial revision
casties
parents:
diff changeset
3 use strict;
30497c6a3eca Initial revision
casties
parents:
diff changeset
4 use XML::SAX;
30497c6a3eca Initial revision
casties
parents:
diff changeset
5 use DBI;
30497c6a3eca Initial revision
casties
parents:
diff changeset
6
30497c6a3eca Initial revision
casties
parents:
diff changeset
7 use lib '/usr/local/mpiwg/archive';
30497c6a3eca Initial revision
casties
parents:
diff changeset
8 use MPIWGStor;
30497c6a3eca Initial revision
casties
parents:
diff changeset
9 use HarvestmetaHandler;
30497c6a3eca Initial revision
casties
parents:
diff changeset
10
30497c6a3eca Initial revision
casties
parents:
diff changeset
11 # make output unbuffered
30497c6a3eca Initial revision
casties
parents:
diff changeset
12 $|=1;
30497c6a3eca Initial revision
casties
parents:
diff changeset
13
30497c6a3eca Initial revision
casties
parents:
diff changeset
14 #######################################################
30497c6a3eca Initial revision
casties
parents:
diff changeset
15 # internal parameters
30497c6a3eca Initial revision
casties
parents:
diff changeset
16 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
17
30497c6a3eca Initial revision
casties
parents:
diff changeset
18 # program version
30497c6a3eca Initial revision
casties
parents:
diff changeset
19 my $version = "0.1 (08.06.2004)";
30497c6a3eca Initial revision
casties
parents:
diff changeset
20
30497c6a3eca Initial revision
casties
parents:
diff changeset
21 # read command line parameters
30497c6a3eca Initial revision
casties
parents:
diff changeset
22 my $args = MPIWGStor::parseargs;
30497c6a3eca Initial revision
casties
parents:
diff changeset
23
30497c6a3eca Initial revision
casties
parents:
diff changeset
24 # debug level
30497c6a3eca Initial revision
casties
parents:
diff changeset
25 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
26
30497c6a3eca Initial revision
casties
parents:
diff changeset
27 # XML namespace (not really implemented!)
30497c6a3eca Initial revision
casties
parents:
diff changeset
28 my $namespace = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
29
30497c6a3eca Initial revision
casties
parents:
diff changeset
30 # delete and rebuild database
30497c6a3eca Initial revision
casties
parents:
diff changeset
31 my $purgeDB = (exists $$args{'purgedb'});
30497c6a3eca Initial revision
casties
parents:
diff changeset
32
30497c6a3eca Initial revision
casties
parents:
diff changeset
33 # database connection
30497c6a3eca Initial revision
casties
parents:
diff changeset
34 my $dbh = DBI->connect("dbi:Pg:dbname=storage", "archiver", "");
30497c6a3eca Initial revision
casties
parents:
diff changeset
35 if (! $dbh) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
36 logger('ABORT', "unable to connect to database!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
37 exit 1;
30497c6a3eca Initial revision
casties
parents:
diff changeset
38 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
39 $dbh->{AutoCommit} = 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
40 my $dbNextFileId;
30497c6a3eca Initial revision
casties
parents:
diff changeset
41 my $dbNewFile;
30497c6a3eca Initial revision
casties
parents:
diff changeset
42 my $dbNewMeta;
30497c6a3eca Initial revision
casties
parents:
diff changeset
43 my $dbClearMeta;
30497c6a3eca Initial revision
casties
parents:
diff changeset
44 my $dbFindFileName;
30497c6a3eca Initial revision
casties
parents:
diff changeset
45 my $dbFindFilePath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
46 my $dbClearFile;
30497c6a3eca Initial revision
casties
parents:
diff changeset
47 my $dbFindFileFlag;
30497c6a3eca Initial revision
casties
parents:
diff changeset
48 my $dbFindFileFlagPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
49 my $dbSetFileFlag;
30497c6a3eca Initial revision
casties
parents:
diff changeset
50 my $dbClearAllFileFlag;
30497c6a3eca Initial revision
casties
parents:
diff changeset
51
30497c6a3eca Initial revision
casties
parents:
diff changeset
52 #######################################################
30497c6a3eca Initial revision
casties
parents:
diff changeset
53 # check parameters that were passed to the program
30497c6a3eca Initial revision
casties
parents:
diff changeset
54 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
55 my $basedir = $$args{'path'};
30497c6a3eca Initial revision
casties
parents:
diff changeset
56 if (! $basedir) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
57 logger("ABORT", "no document directory given!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
58 exit 1;
30497c6a3eca Initial revision
casties
parents:
diff changeset
59 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
60 # strip trailing slashes
30497c6a3eca Initial revision
casties
parents:
diff changeset
61 $basedir =~ s/\/$//;
30497c6a3eca Initial revision
casties
parents:
diff changeset
62 if (! -d $basedir) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
63 logger("ABORT", "document directory \'$basedir\' doesn't exist!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
64 exit 1;
30497c6a3eca Initial revision
casties
parents:
diff changeset
65 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
66
30497c6a3eca Initial revision
casties
parents:
diff changeset
67 my $metaParserHandler = HarvestmetaHandler->new;
30497c6a3eca Initial revision
casties
parents:
diff changeset
68 my $metaParser = XML::SAX::ParserFactory->parser(Handler => $metaParserHandler);
30497c6a3eca Initial revision
casties
parents:
diff changeset
69
30497c6a3eca Initial revision
casties
parents:
diff changeset
70 #######################################################
30497c6a3eca Initial revision
casties
parents:
diff changeset
71 # internal variables
30497c6a3eca Initial revision
casties
parents:
diff changeset
72 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
73
30497c6a3eca Initial revision
casties
parents:
diff changeset
74 # number of errors
30497c6a3eca Initial revision
casties
parents:
diff changeset
75 my $errcnt = 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
76 # number of warnings
30497c6a3eca Initial revision
casties
parents:
diff changeset
77 my $warncnt = 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
78
30497c6a3eca Initial revision
casties
parents:
diff changeset
79 # number of files on fs
30497c6a3eca Initial revision
casties
parents:
diff changeset
80 my $fcnt = 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
81 # number of index files
30497c6a3eca Initial revision
casties
parents:
diff changeset
82 my $idxcnt = 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
83
30497c6a3eca Initial revision
casties
parents:
diff changeset
84 #######################################################
30497c6a3eca Initial revision
casties
parents:
diff changeset
85 # subroutines
30497c6a3eca Initial revision
casties
parents:
diff changeset
86 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
87
30497c6a3eca Initial revision
casties
parents:
diff changeset
88 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
89 # readAllFiles($realdir, $basedir, \%files, \%dirs)
30497c6a3eca Initial revision
casties
parents:
diff changeset
90 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
91 # reads all files and directories below $realdir and puts the
30497c6a3eca Initial revision
casties
parents:
diff changeset
92 # files in %files and directories in %dirs
30497c6a3eca Initial revision
casties
parents:
diff changeset
93 # $basedir is only for recursion, it should be empty when called
30497c6a3eca Initial revision
casties
parents:
diff changeset
94 # from outside
30497c6a3eca Initial revision
casties
parents:
diff changeset
95 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
96 sub readAllFiles {
30497c6a3eca Initial revision
casties
parents:
diff changeset
97 my ($directory, $basedir) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
98 my $cnt = 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
99
30497c6a3eca Initial revision
casties
parents:
diff changeset
100 if (! opendir DIR, $directory) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
101 return 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
102 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
103 my @dirfiles = readdir DIR;
30497c6a3eca Initial revision
casties
parents:
diff changeset
104 foreach my $fn (@dirfiles) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
105 # ignore names starting with a dot
30497c6a3eca Initial revision
casties
parents:
diff changeset
106 next if ($fn =~ /^\./);
30497c6a3eca Initial revision
casties
parents:
diff changeset
107 # ignore other silly files
30497c6a3eca Initial revision
casties
parents:
diff changeset
108 next if ($junk_files{$fn});
30497c6a3eca Initial revision
casties
parents:
diff changeset
109
30497c6a3eca Initial revision
casties
parents:
diff changeset
110 $cnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
111 $fcnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
112 my $f = "$directory/$fn";
30497c6a3eca Initial revision
casties
parents:
diff changeset
113 my $docf = ($basedir) ? "$basedir/$fn" : $fn;
30497c6a3eca Initial revision
casties
parents:
diff changeset
114 #logger('DEBUG', "fs_file: \"$f\"");
30497c6a3eca Initial revision
casties
parents:
diff changeset
115 if (-f $f) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
116 #logger(" is file");
30497c6a3eca Initial revision
casties
parents:
diff changeset
117 if ($fn eq "index.meta") {
30497c6a3eca Initial revision
casties
parents:
diff changeset
118 harvestFile($fn, $directory);
30497c6a3eca Initial revision
casties
parents:
diff changeset
119 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
120 } elsif (-d _) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
121 #logger(" is dir");
30497c6a3eca Initial revision
casties
parents:
diff changeset
122 # recurse into directory
30497c6a3eca Initial revision
casties
parents:
diff changeset
123 $cnt += readAllFiles($f, $docf);
30497c6a3eca Initial revision
casties
parents:
diff changeset
124 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
125 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
126 return $cnt;
30497c6a3eca Initial revision
casties
parents:
diff changeset
127 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
128
30497c6a3eca Initial revision
casties
parents:
diff changeset
129 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
130 # cleanUnmarkedFiles($basepath)
30497c6a3eca Initial revision
casties
parents:
diff changeset
131 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
132 # deletes all unflagged file and meta entries.
30497c6a3eca Initial revision
casties
parents:
diff changeset
133 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
134 sub cleanUnmarkedFiles {
30497c6a3eca Initial revision
casties
parents:
diff changeset
135 my ($basepath) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
136 my $rv = $dbFindFileFlagPath->execute("${basepath}%");
30497c6a3eca Initial revision
casties
parents:
diff changeset
137 my $ids = $dbFindFileFlagPath->fetchall_arrayref;
30497c6a3eca Initial revision
casties
parents:
diff changeset
138 for my $i (@$ids) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
139 my $id = $$i[0];
30497c6a3eca Initial revision
casties
parents:
diff changeset
140 logger('DEBUG', "cleaning file and meta of id: $id");
30497c6a3eca Initial revision
casties
parents:
diff changeset
141 $dbClearMeta->execute($id);
30497c6a3eca Initial revision
casties
parents:
diff changeset
142 $dbClearFile->execute($id);
30497c6a3eca Initial revision
casties
parents:
diff changeset
143 $dbh->commit;
30497c6a3eca Initial revision
casties
parents:
diff changeset
144 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
145 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
146
30497c6a3eca Initial revision
casties
parents:
diff changeset
147 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
148 # harvestFile($filename, $filepath)
30497c6a3eca Initial revision
casties
parents:
diff changeset
149 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
150 # reads the index file $filename at $filepath and puts the contents
30497c6a3eca Initial revision
casties
parents:
diff changeset
151 # in the database.
30497c6a3eca Initial revision
casties
parents:
diff changeset
152 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
153 sub harvestFile {
30497c6a3eca Initial revision
casties
parents:
diff changeset
154 my ($filename, $filepath) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
155 logger('DEBUG', "looking at file '$filename' at '$filepath'");
30497c6a3eca Initial revision
casties
parents:
diff changeset
156 # get file time
30497c6a3eca Initial revision
casties
parents:
diff changeset
157 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
30497c6a3eca Initial revision
casties
parents:
diff changeset
158 $atime,$mtime,$ctime,$blksize,$blocks)
30497c6a3eca Initial revision
casties
parents:
diff changeset
159 = stat("$filepath/$filename");
30497c6a3eca Initial revision
casties
parents:
diff changeset
160 my $filetime = stime($mtime);
30497c6a3eca Initial revision
casties
parents:
diff changeset
161 # register file in db
30497c6a3eca Initial revision
casties
parents:
diff changeset
162 my $fid = registerFile("$filepath/$filename", $filetime);
30497c6a3eca Initial revision
casties
parents:
diff changeset
163 if ($fid) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
164 # file is new/modified
30497c6a3eca Initial revision
casties
parents:
diff changeset
165 # parse index file
30497c6a3eca Initial revision
casties
parents:
diff changeset
166 $metaParser->parse_uri("$filepath/$filename");
30497c6a3eca Initial revision
casties
parents:
diff changeset
167 my @data = $metaParserHandler->getData();
30497c6a3eca Initial revision
casties
parents:
diff changeset
168 logger('DEBUG', "parsed $#data+1 elements");
30497c6a3eca Initial revision
casties
parents:
diff changeset
169 registerMeta($fid, @data);
30497c6a3eca Initial revision
casties
parents:
diff changeset
170 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
171 $idxcnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
172 logger('INFO', "$idxcnt index files of $fcnt") if ($idxcnt % 10 == 0) ;
30497c6a3eca Initial revision
casties
parents:
diff changeset
173 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
174
30497c6a3eca Initial revision
casties
parents:
diff changeset
175 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
176 # $fileid = registerFile($filepath, $filetime)
30497c6a3eca Initial revision
casties
parents:
diff changeset
177 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
178 # returns the file ID for the file $filepath. If necessary it
30497c6a3eca Initial revision
casties
parents:
diff changeset
179 # will be added to the database. returns 0 if an update is not necessary.
30497c6a3eca Initial revision
casties
parents:
diff changeset
180 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
181 sub registerFile {
30497c6a3eca Initial revision
casties
parents:
diff changeset
182 my ($filepath, $filetime) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
183 my $fileid = 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
184 # look if file is in db
30497c6a3eca Initial revision
casties
parents:
diff changeset
185 my $rv = $dbFindFileName->execute($filepath);
30497c6a3eca Initial revision
casties
parents:
diff changeset
186 my $mtime;
30497c6a3eca Initial revision
casties
parents:
diff changeset
187 ($fileid, $mtime) = $dbFindFileName->fetchrow_array;
30497c6a3eca Initial revision
casties
parents:
diff changeset
188 if ($fileid) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
189 # file is in db
30497c6a3eca Initial revision
casties
parents:
diff changeset
190 # update flag
30497c6a3eca Initial revision
casties
parents:
diff changeset
191 $dbSetFileFlag->execute($fileid, 1);
30497c6a3eca Initial revision
casties
parents:
diff changeset
192 $dbh->commit;
30497c6a3eca Initial revision
casties
parents:
diff changeset
193 my $stime = s2stime($mtime);
30497c6a3eca Initial revision
casties
parents:
diff changeset
194 if ($stime ge $filetime) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
195 # if its current return 0
30497c6a3eca Initial revision
casties
parents:
diff changeset
196 logger('DEBUG', "file: $fileid is old! time: '$stime' (vs '$filetime')");
30497c6a3eca Initial revision
casties
parents:
diff changeset
197 return 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
198 } else {
30497c6a3eca Initial revision
casties
parents:
diff changeset
199 logger('DEBUG', "file: $fileid is new! time: '$stime' (vs '$filetime')");
30497c6a3eca Initial revision
casties
parents:
diff changeset
200 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
201 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
202 if (! $fileid) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
203 # get a new file id
30497c6a3eca Initial revision
casties
parents:
diff changeset
204 my $rv = $dbNextFileId->execute;
30497c6a3eca Initial revision
casties
parents:
diff changeset
205 ($fileid) = $dbNextFileId->fetchrow_array;
30497c6a3eca Initial revision
casties
parents:
diff changeset
206 logger('DEBUG', "DB newfile: id=$fileid filename=$filepath mtime=$filetime");
30497c6a3eca Initial revision
casties
parents:
diff changeset
207 $dbNewFile->execute($fileid, $filepath, $filetime);
30497c6a3eca Initial revision
casties
parents:
diff changeset
208 # update flag
30497c6a3eca Initial revision
casties
parents:
diff changeset
209 $dbSetFileFlag->execute($fileid, 1);
30497c6a3eca Initial revision
casties
parents:
diff changeset
210 $dbh->commit;
30497c6a3eca Initial revision
casties
parents:
diff changeset
211 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
212 return $fileid;
30497c6a3eca Initial revision
casties
parents:
diff changeset
213 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
214
30497c6a3eca Initial revision
casties
parents:
diff changeset
215 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
216 # registerMeta($fileid, @meta)
30497c6a3eca Initial revision
casties
parents:
diff changeset
217 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
218 # adds the metadata information @meta for $fileid to the database.
30497c6a3eca Initial revision
casties
parents:
diff changeset
219 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
220 sub registerMeta {
30497c6a3eca Initial revision
casties
parents:
diff changeset
221 my ($fileid, @meta) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
222 logger('DEBUG', "DB newmeta: fileid=$fileid ($#meta)");
30497c6a3eca Initial revision
casties
parents:
diff changeset
223 my $idx = 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
224 foreach my $keyval (@meta) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
225 #logger('DEBUG', " DB meta: $$keyval[0]=$$keyval[1]");
30497c6a3eca Initial revision
casties
parents:
diff changeset
226 $dbNewMeta->execute($fileid, $idx++, $$keyval[0], $$keyval[2], $$keyval[1]);
30497c6a3eca Initial revision
casties
parents:
diff changeset
227 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
228 $dbh->commit;
30497c6a3eca Initial revision
casties
parents:
diff changeset
229 logger('INFO', "added $idx elements (file $fileid)");
30497c6a3eca Initial revision
casties
parents:
diff changeset
230 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
231
30497c6a3eca Initial revision
casties
parents:
diff changeset
232 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
233 # initdb()
30497c6a3eca Initial revision
casties
parents:
diff changeset
234 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
235 # initialises the database connection.
30497c6a3eca Initial revision
casties
parents:
diff changeset
236 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
237 sub initDB {
30497c6a3eca Initial revision
casties
parents:
diff changeset
238 my $rv;
30497c6a3eca Initial revision
casties
parents:
diff changeset
239 # clean tables
30497c6a3eca Initial revision
casties
parents:
diff changeset
240 if ($purgeDB) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
241 $rv = $dbh->do("delete from files");
30497c6a3eca Initial revision
casties
parents:
diff changeset
242 $rv = $dbh->do("delete from meta");
30497c6a3eca Initial revision
casties
parents:
diff changeset
243 if ($dbh->err) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
244 logger('ABORT', "unable to clean table!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
245 exit 1;
30497c6a3eca Initial revision
casties
parents:
diff changeset
246 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
247 $dbh->commit;
30497c6a3eca Initial revision
casties
parents:
diff changeset
248 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
249
30497c6a3eca Initial revision
casties
parents:
diff changeset
250 # clear flags
30497c6a3eca Initial revision
casties
parents:
diff changeset
251 $rv = $dbh->do("create temporary table file_flags ( fileid integer primary key, flag integer )");
30497c6a3eca Initial revision
casties
parents:
diff changeset
252 $dbh->commit;
30497c6a3eca Initial revision
casties
parents:
diff changeset
253
30497c6a3eca Initial revision
casties
parents:
diff changeset
254 # prepare statements
30497c6a3eca Initial revision
casties
parents:
diff changeset
255 $dbNextFileId = $dbh->prepare("select nextval('files_id_seq')");
30497c6a3eca Initial revision
casties
parents:
diff changeset
256 $dbNewFile = $dbh->prepare("insert into files (id, filename, mtime) values (?,?,?)");
30497c6a3eca Initial revision
casties
parents:
diff changeset
257 $dbFindFileName = $dbh->prepare("select id,mtime from files where filename=?");
30497c6a3eca Initial revision
casties
parents:
diff changeset
258 $dbFindFilePath = $dbh->prepare("select id,filename,flag from files where filename like ?");
30497c6a3eca Initial revision
casties
parents:
diff changeset
259 $dbClearFile = $dbh->prepare("delete from files where id=?");
30497c6a3eca Initial revision
casties
parents:
diff changeset
260 $dbFindFileFlag = $dbh->prepare("select fileid from file_flags where flag=?");
30497c6a3eca Initial revision
casties
parents:
diff changeset
261 $dbFindFileFlagPath = $dbh->prepare("select id from files left outer join file_flags on files.id=file_flags.fileid where filename like ? and flag is null");
30497c6a3eca Initial revision
casties
parents:
diff changeset
262 $dbSetFileFlag = $dbh->prepare("insert into file_flags (fileid, flag) values (?,?)");
30497c6a3eca Initial revision
casties
parents:
diff changeset
263 $dbNewMeta = $dbh->prepare("insert into meta (fileid, idx, tags, attributes, content) values (?,?,?,?,?)");
30497c6a3eca Initial revision
casties
parents:
diff changeset
264 $dbClearMeta = $dbh->prepare("delete from meta where fileid=?");
30497c6a3eca Initial revision
casties
parents:
diff changeset
265
30497c6a3eca Initial revision
casties
parents:
diff changeset
266 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
267
30497c6a3eca Initial revision
casties
parents:
diff changeset
268 #######################################################
30497c6a3eca Initial revision
casties
parents:
diff changeset
269 # main
30497c6a3eca Initial revision
casties
parents:
diff changeset
270 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
271
30497c6a3eca Initial revision
casties
parents:
diff changeset
272 logger("INFO", "harvestmeta $version");
30497c6a3eca Initial revision
casties
parents:
diff changeset
273
30497c6a3eca Initial revision
casties
parents:
diff changeset
274 initDB();
30497c6a3eca Initial revision
casties
parents:
diff changeset
275
30497c6a3eca Initial revision
casties
parents:
diff changeset
276 # read and process all files under $basedir
30497c6a3eca Initial revision
casties
parents:
diff changeset
277 my $fnum = readAllFiles($basedir, "");
30497c6a3eca Initial revision
casties
parents:
diff changeset
278 # delete orphaned data (under $basedir)
30497c6a3eca Initial revision
casties
parents:
diff changeset
279 cleanUnmarkedFiles($basedir);
30497c6a3eca Initial revision
casties
parents:
diff changeset
280
30497c6a3eca Initial revision
casties
parents:
diff changeset
281 logger("INFO", "analysed $idxcnt of $fnum files!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
282 logger("INFO", "$warncnt warnings");
30497c6a3eca Initial revision
casties
parents:
diff changeset
283 logger("INFO", "$errcnt errors");
30497c6a3eca Initial revision
casties
parents:
diff changeset
284 if ($errcnt > 0) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
285 logger("ABORT", "there were errors!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
286 exit 1;
30497c6a3eca Initial revision
casties
parents:
diff changeset
287 } else {
30497c6a3eca Initial revision
casties
parents:
diff changeset
288 logger("DONE", "all index files read successfully!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
289 }