List of usage examples for edu.stanford.nlp.ie.ner CMMClassifier makeDatum
public Datum<String, String> makeDatum(List<IN> info, int loc, List<FeatureFactory<IN>> featureFactories)
From source file:lv.lumii.morphotagger.MorphoPipe.java
License:Open Source License
private static String output_CONLL(List<CoreLabel> tokens, CMMClassifier<CoreLabel> cmm) { StringBuilder s = new StringBuilder(); int counter = 1; for (CoreLabel word : tokens) { String token = word.getString(TextAnnotation.class); if (token.contains("<s>")) continue; token = token.replace(' ', '_'); s.append(Integer.toString(counter)); s.append('\t'); s.append(token);// w w w . j a v a2s. c o m s.append('\t'); Word analysis = word.get(LVMorphologyAnalysis.class); Wordform mainwf = analysis.getMatchingWordform(word.getString(AnswerAnnotation.class), false); if (mainwf != null) { String lemma = mainwf.getValue(AttributeNames.i_Lemma); lemma = lemma.replace(' ', '_'); String answer = word.getString(AnswerAnnotation.class); if (answer.trim().isEmpty()) answer = "_"; // no empty tag s.append(lemma); s.append('\t'); s.append(answer); s.append('\t'); s.append(mainwf.getTag()); s.append('\t'); // Feature atribtu filtri if (mini_tag) mainwf.removeNonlexicalAttributes(); if (LETAfeatures) { addLETAfeatures(mainwf); // mainwf.removeAttribute(AttributeNames.i_SourceLemma); FIXME - atvasin?tiem v?rdiem is var bt svargs, atpriedekotas lemmas.. mainwf.removeTechnicalAttributes(); } s.append(mainwf.pipeDelimitedEntries()); // Pievienojam v?rda f?as if (features) { // visas f?as, ko lietoja trenjot Datum<String, String> d = cmm.makeDatum(tokens, counter, cmm.featureFactory); for (String feature : d.asFeatures()) { s.append(feature.substring(0, feature.length() - 2).replace(' ', '_')); // noeam trailing |C kas t?m f??m tur ir s.append('|'); } } s.deleteCharAt(s.length() - 1); // noemam peedeejo | separatoru, kas ir lieks s.append('\t'); } else { s.append(token); s.append("\t_\t_\t_\t"); } if (saveColumns) { s.append(word.getString(ExtraColumnAnnotation.class)); } else { String syntax = word.getString(ParentAnnotation.class); if (syntax != null) { s.append(syntax); } else s.append("_\t_\t_\t_"); } s.append(eol); counter++; } return s.toString(); }