List of usage examples for edu.stanford.nlp.process WordShapeClassifier WORDSHAPEDIGITS
int WORDSHAPEDIGITS
To view the source code for edu.stanford.nlp.process WordShapeClassifier WORDSHAPEDIGITS.
Click Source Link
From source file:de.iisys.ocr.pos.CustomNERFeatureFactory.java
License:Open Source License
private void initLexicon(SeqClassifierFlags flags) { if (flags.distSimLexicon == null) { return;/*from www. j ava2 s . c o m*/ } if (lexicon != null) { return; } Timing.startDoing("Loading distsim lexicon from " + flags.distSimLexicon); lexicon = Generics.newHashMap(); boolean terryKoo = "terryKoo".equals(flags.distSimFileFormat); for (String line : ObjectBank.getLineIterator(flags.distSimLexicon, flags.inputEncoding)) { String word; String wordClass; if (terryKoo) { String[] bits = line.split("\\t"); word = bits[1]; wordClass = bits[0]; if (flags.distSimMaxBits > 0 && wordClass.length() > flags.distSimMaxBits) { wordClass = wordClass.substring(0, flags.distSimMaxBits); } } else { // "alexClark" String[] bits = line.split("\\s+"); word = bits[0]; wordClass = bits[1]; } if (!flags.casedDistSim) { word = word.toLowerCase(); } if (flags.numberEquivalenceDistSim) { word = WordShapeClassifier.wordShape(word, WordShapeClassifier.WORDSHAPEDIGITS); } lexicon.put(word, wordClass); } Timing.endDoing(); }
From source file:de.iisys.ocr.pos.CustomNERFeatureFactory.java
License:Open Source License
private void distSimAnnotate(PaddedList<IN> info) { for (CoreLabel fl : info) { if (fl.has(CoreAnnotations.DistSimAnnotation.class)) { return; }/* w w w. j a v a 2s. c o m*/ String word = getWord(fl); if (!flags.casedDistSim) { word = word.toLowerCase(); } if (flags.numberEquivalenceDistSim) { word = WordShapeClassifier.wordShape(word, WordShapeClassifier.WORDSHAPEDIGITS); } String distSim = lexicon.get(word); if (distSim == null) { distSim = flags.unknownWordDistSimClass; } fl.set(CoreAnnotations.DistSimAnnotation.class, distSim); } }