Mercurial > hg > anteater
view src/classifiers/english.muc.7class.distsim.prop @ 1:7a4341c9f2e5
checking permit numbers for similarity if no direct match
author | jdamerow |
---|---|
date | Fri, 05 Oct 2012 18:52:14 -0700 |
parents | 036535fcd179 |
children |
line wrap: on
line source
trainFile = /u/nlp/data/ner/goodClassifiers/data/muc67.jenny.train testFile = /u/nlp/data/ner/goodClassifiers/data/muc67.jenny.test serializeTo = english.muc.7class.distsim.crf.ser.gz distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters useDistSim = true map = word=0,answer=1 saveFeatureIndexToDisk = true useClassFeature=true useWord=true #useWordPairs=true useNGrams=true noMidNGrams=true maxNGramLeng=6 usePrev=true useNext=true #useTags=true #useWordTag=true useLongSequences=true useSequences=true usePrevSequences=true maxLeft=1 useTypeSeqs=true useTypeSeqs2=true useTypeySequences=true useOccurrencePatterns=true useLastRealWord=true useNextRealWord=true #useReverse=false normalize=true # normalizeTimex=true wordShape=chris2useLC useDisjunctive=true disjunctionWidth=5 #useDisjunctiveShapeInteraction=true type=crf readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter useObservedSequencesOnly=true useQN = true QNsize = 25 # makes it go faster featureDiffThresh=0.05