Example usage for edu.stanford.nlp.ie.ner CMMClassifier makeDatum

List of usage examples for edu.stanford.nlp.ie.ner CMMClassifier makeDatum

Introduction

In this page you can find the example usage for edu.stanford.nlp.ie.ner CMMClassifier makeDatum.

Prototype

public Datum<String, String> makeDatum(List<IN> info, int loc, List<FeatureFactory<IN>> featureFactories) 

Source Link

Document

Make an individual Datum out of the data list info, focused at position loc.

Usage

From source file:lv.lumii.morphotagger.MorphoPipe.java

License:Open Source License

private static String output_CONLL(List<CoreLabel> tokens, CMMClassifier<CoreLabel> cmm) {
    StringBuilder s = new StringBuilder();

    int counter = 1;
    for (CoreLabel word : tokens) {
        String token = word.getString(TextAnnotation.class);
        if (token.contains("<s>"))
            continue;
        token = token.replace(' ', '_');

        s.append(Integer.toString(counter));
        s.append('\t');
        s.append(token);//  w w w  .  j  a v a2s. c  o  m
        s.append('\t');
        Word analysis = word.get(LVMorphologyAnalysis.class);
        Wordform mainwf = analysis.getMatchingWordform(word.getString(AnswerAnnotation.class), false);
        if (mainwf != null) {
            String lemma = mainwf.getValue(AttributeNames.i_Lemma);
            lemma = lemma.replace(' ', '_');
            String answer = word.getString(AnswerAnnotation.class);
            if (answer.trim().isEmpty())
                answer = "_"; // no empty tag
            s.append(lemma);
            s.append('\t');
            s.append(answer);
            s.append('\t');
            s.append(mainwf.getTag());
            s.append('\t');

            // Feature atribtu filtri
            if (mini_tag)
                mainwf.removeNonlexicalAttributes();
            if (LETAfeatures) {
                addLETAfeatures(mainwf);
                // mainwf.removeAttribute(AttributeNames.i_SourceLemma); FIXME - atvasin?tiem v?rdiem is var bt svargs, atpriedekotas lemmas..
                mainwf.removeTechnicalAttributes();
            }

            s.append(mainwf.pipeDelimitedEntries()); // Pievienojam v?rda f?as

            if (features) { // visas f?as, ko lietoja trenjot
                Datum<String, String> d = cmm.makeDatum(tokens, counter, cmm.featureFactory);
                for (String feature : d.asFeatures()) {
                    s.append(feature.substring(0, feature.length() - 2).replace(' ', '_')); // noeam trailing |C kas t?m f??m tur ir
                    s.append('|');
                }
            }
            s.deleteCharAt(s.length() - 1); // noemam peedeejo | separatoru, kas ir lieks
            s.append('\t');

        } else {
            s.append(token);
            s.append("\t_\t_\t_\t");
        }
        if (saveColumns) {
            s.append(word.getString(ExtraColumnAnnotation.class));
        } else {
            String syntax = word.getString(ParentAnnotation.class);
            if (syntax != null) {
                s.append(syntax);
            } else
                s.append("_\t_\t_\t_");
        }
        s.append(eol);
        counter++;
    }

    return s.toString();
}