Mercurial > hg > anteater
diff src/classifiers/english.all.3class.distsim.prop @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/classifiers/english.all.3class.distsim.prop Fri Sep 14 10:30:43 2012 +0200 @@ -0,0 +1,52 @@ +trainFile = /u/nlp/data/ner/goodClassifiers/data/all.3class.train +testFile = /u/nlp/data/ner/column_data/conll.testa +serializeTo = english.all.3class.distsim.crf.ser.gz + +type = crf + +#distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned +#distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200 +distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters +useDistSim = true + +map = word=0,answer=1 + +saveFeatureIndexToDisk = true + +useClassFeature=true +useWord=true +#useWordPairs=true +useNGrams=true +noMidNGrams=true +maxNGramLeng=6 +usePrev=true +useNext=true +#useTags=true +#useWordTag=true +useLongSequences=true +useSequences=true +usePrevSequences=true +maxLeft=1 +useTypeSeqs=true +useTypeSeqs2=true +useTypeySequences=true +useOccurrencePatterns=true +useLastRealWord=true +useNextRealWord=true +#useReverse=false +normalize=true +# normalizeTimex=true +wordShape=chris2useLC +useDisjunctive=true +disjunctionWidth=5 +#useDisjunctiveShapeInteraction=true + +readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter + +useObservedSequencesOnly=true + +useQN = true +QNsize = 25 + +# makes it go faster +featureDiffThresh=0.05