List of usage examples for edu.stanford.nlp.util Triple Triple
public Triple(T1 first, T2 second, T3 third)
From source file:de.l3s.workive.analysis.ner.GermanNER.java
public List<Entity> extractEntities(CoreMap sentence) { List<Entity> entityList = new ArrayList<Entity>(); CoreLabel prevEntity = null;/*w w w . j a v a2s. c om*/ String tag = ""; for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String entityTag = token.get(NamedEntityTagAnnotation.class); //System.out.println(entityTag); if (entityTag.compareToIgnoreCase("I-ORG") == 0 || entityTag.compareToIgnoreCase("I-PER") == 0 || entityTag.compareToIgnoreCase("I-LOC") == 0 || entityTag.compareToIgnoreCase("MISC") == 0) { if (prevEntity != null) { if (prevEntity.get(NamedEntityTagAnnotation.class).compareToIgnoreCase(entityTag) == 0 && prevEntity.endPosition() == token.beginPosition() - 1) { prevEntity.setEndPosition(token.endPosition()); prevEntity.set(TextAnnotation.class, prevEntity.get(TextAnnotation.class) + " " + token.get(TextAnnotation.class)); } else { Triple<String, Integer, Integer> triple = new Triple<String, Integer, Integer>( prevEntity.get(TextAnnotation.class), prevEntity.beginPosition(), prevEntity.endPosition()); entityList.add(new Entity(triple, tag)); prevEntity = token; tag = entityTag; } } else { prevEntity = token; tag = entityTag; } } } if (prevEntity != null) { Triple<String, Integer, Integer> triple = new Triple<String, Integer, Integer>( prevEntity.get(TextAnnotation.class), prevEntity.beginPosition(), prevEntity.endPosition()); entityList.add(new Entity(triple, tag)); tag = ""; } return entityList; }
From source file:edu.stanford.muse.index.NEROld.java
License:Apache License
public static String retainOnlyNames(String text, List<Triple<String, Integer, Integer>> offsets) { if (offsets == null) return retainOnlyNames(text); // be forgiving int len = text.length(); offsets.add(new Triple<String, Integer, Integer>(null, len, len)); // sentinel int prev_name_end_pos = 0; // pos of first char after previous name StringBuilder result = new StringBuilder(); for (Triple<String, Integer, Integer> t : offsets) { int begin_pos = t.second(); int end_pos = t.third(); if (begin_pos > len || end_pos > len) { // TODO: this is unclean. currently happens because we concat body & title together when we previously generated these offsets but now we only have body. begin_pos = end_pos = len;//from ww w . j a va 2s . c o m } String filler = text.substring(prev_name_end_pos, begin_pos); //filler = filler.replaceAll("\\w", "."); // CRITICAL: \w only matches (redacts) english language filler = filler.replaceAll("[^\\p{Punct}\\s]", "."); result.append(filler); result.append(text.substring(begin_pos, end_pos)); prev_name_end_pos = end_pos; } return result.toString(); }
From source file:es.dmr.flink.nlp.StanfordNLPCoreExtractor.java
public List<Triple<String, Integer, Integer>> getPersonMarkers(String text) { List<Triple<String, Integer, Integer>> personsOnlyList = new ArrayList<>(); List<Triple<String, Integer, Integer>> list = classifier.classifyToCharacterOffsets(text); for (Triple<String, Integer, Integer> item : list) { if (item.first().compareTo(PERSON) == 0) { String name = text.substring(item.second(), item.third()); personsOnlyList.add(new Triple(name, item.second(), item.third())); }/*from w w w.ja v a 2s . c o m*/ } return personsOnlyList; }
From source file:LVCoref.Document.java
License:Open Source License
private static List<Triple<Integer, Integer, String>> getLabelledSpans(Sentence sent, int fieldIndex, String defaultMarker, boolean checkEndLabel) { List<Triple<Integer, Integer, String>> spans = new ArrayList<Triple<Integer, Integer, String>>(); Stack<Triple<Integer, Integer, String>> openSpans = new Stack<Triple<Integer, Integer, String>>(); for (int wordPos = 0; wordPos < sent.getSize(); wordPos++) { String val = sent.getNode(wordPos).getField(fieldIndex); if (!defaultMarker.equals(val)) { int openParenIndex = -1; int lastDelimiterIndex = -1; for (int j = 0; j < val.length(); j++) { char c = val.charAt(j); boolean isDelimiter = false; if (c == '(' || c == ')' || c == '|') { if (openParenIndex >= 0) { String s = val.substring(openParenIndex + 1, j); // if (removeStar) { // s = starPattern.matcher(s).replaceAll(""); // } openSpans.push(new Triple<Integer, Integer, String>(wordPos, -1, s)); openParenIndex = -1; }/* w ww . ja v a2 s .c om*/ isDelimiter = true; } if (c == '(') { openParenIndex = j; } else if (c == ')') { Triple<Integer, Integer, String> t = openSpans.pop(); if (checkEndLabel) { // NOTE: end parens may cross (usually because mention either start or end on the same token // and it is just an artifact of the ordering String s = val.substring(lastDelimiterIndex + 1, j); if (!s.equals(t.third())) { Stack<Triple<Integer, Integer, String>> saved = new Stack<Triple<Integer, Integer, String>>(); while (!s.equals(t.third())) { // find correct match saved.push(t); if (openSpans.isEmpty()) { throw new RuntimeException("Cannot find matching labelled span for " + s); } t = openSpans.pop(); } while (!saved.isEmpty()) { openSpans.push(saved.pop()); } assert (s.equals(t.third())); } } t.setSecond(wordPos); spans.add(t); } if (isDelimiter) { lastDelimiterIndex = j; } } if (openParenIndex >= 0) { String s = val.substring(openParenIndex + 1, val.length()); // if (removeStar) { // s = starPattern.matcher(s).replaceAll(""); // } openSpans.push(new Triple<Integer, Integer, String>(wordPos, -1, s)); } } } if (openSpans.size() != 0) { throw new RuntimeException("Error extracting labelled spans for column " + fieldIndex + ": " + sent); } return spans; }
From source file:NER.NERAnnotation.java
public List<Entity> extractEntities(CoreMap sentence) { List<Entity> entityList = new ArrayList<Entity>(); CoreLabel prevEntity = null;/* w w w .j av a 2s . c om*/ String tag = ""; for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String entityTag = token.get(NamedEntityTagAnnotation.class); //System.out.println(entityTag); if (entityTag.compareToIgnoreCase("LOCATION") == 0 || entityTag.compareToIgnoreCase("DATE") == 0 || entityTag.compareToIgnoreCase("PERSON") == 0 || entityTag.compareToIgnoreCase("ORGANIZATION") == 0 || entityTag.compareToIgnoreCase("MISC") == 0) { if (prevEntity != null) { if (prevEntity.get(NamedEntityTagAnnotation.class).compareToIgnoreCase(entityTag) == 0 && prevEntity.endPosition() == token.beginPosition() - 1) { prevEntity.setEndPosition(token.endPosition()); prevEntity.set(TextAnnotation.class, prevEntity.get(TextAnnotation.class) + " " + token.get(TextAnnotation.class)); // tag=entityTag; // System.out.println(entityTag); } else { Triple<String, Integer, Integer> triple = new Triple<String, Integer, Integer>( prevEntity.get(TextAnnotation.class), prevEntity.beginPosition(), prevEntity.endPosition()); entityList.add(new Entity(triple, tag)); prevEntity = token; tag = entityTag; } } else { prevEntity = token; tag = entityTag; //System.out.println(entityTag); } } } if (prevEntity != null) { Triple<String, Integer, Integer> triple = new Triple<String, Integer, Integer>( prevEntity.get(TextAnnotation.class), prevEntity.beginPosition(), prevEntity.endPosition()); entityList.add(new Entity(triple, tag)); tag = ""; // System.out.println(tag); } return entityList; }