Example usage for edu.stanford.nlp.process WordShapeClassifier WORDSHAPEDIGITS

List of usage examples for edu.stanford.nlp.process WordShapeClassifier WORDSHAPEDIGITS

Introduction

In this page you can find the example usage for edu.stanford.nlp.process WordShapeClassifier WORDSHAPEDIGITS.

Prototype

int WORDSHAPEDIGITS

To view the source code for edu.stanford.nlp.process WordShapeClassifier WORDSHAPEDIGITS.

Click Source Link

Usage

From source file:de.iisys.ocr.pos.CustomNERFeatureFactory.java

License:Open Source License

private void initLexicon(SeqClassifierFlags flags) {
    if (flags.distSimLexicon == null) {
        return;/*from www. j  ava2 s  .  c o  m*/
    }
    if (lexicon != null) {
        return;
    }
    Timing.startDoing("Loading distsim lexicon from " + flags.distSimLexicon);
    lexicon = Generics.newHashMap();
    boolean terryKoo = "terryKoo".equals(flags.distSimFileFormat);
    for (String line : ObjectBank.getLineIterator(flags.distSimLexicon, flags.inputEncoding)) {
        String word;
        String wordClass;
        if (terryKoo) {
            String[] bits = line.split("\\t");
            word = bits[1];
            wordClass = bits[0];
            if (flags.distSimMaxBits > 0 && wordClass.length() > flags.distSimMaxBits) {
                wordClass = wordClass.substring(0, flags.distSimMaxBits);
            }
        } else {
            // "alexClark"
            String[] bits = line.split("\\s+");
            word = bits[0];
            wordClass = bits[1];
        }
        if (!flags.casedDistSim) {
            word = word.toLowerCase();
        }
        if (flags.numberEquivalenceDistSim) {
            word = WordShapeClassifier.wordShape(word, WordShapeClassifier.WORDSHAPEDIGITS);
        }
        lexicon.put(word, wordClass);
    }
    Timing.endDoing();
}

From source file:de.iisys.ocr.pos.CustomNERFeatureFactory.java

License:Open Source License

private void distSimAnnotate(PaddedList<IN> info) {
    for (CoreLabel fl : info) {
        if (fl.has(CoreAnnotations.DistSimAnnotation.class)) {
            return;
        }/* w  w w.  j  a v  a 2s.  c o m*/
        String word = getWord(fl);
        if (!flags.casedDistSim) {
            word = word.toLowerCase();
        }
        if (flags.numberEquivalenceDistSim) {
            word = WordShapeClassifier.wordShape(word, WordShapeClassifier.WORDSHAPEDIGITS);
        }
        String distSim = lexicon.get(word);
        if (distSim == null) {
            distSim = flags.unknownWordDistSimClass;
        }
        fl.set(CoreAnnotations.DistSimAnnotation.class, distSim);
    }
}