List of usage examples for edu.stanford.nlp.ling CoreLabel setEndPosition
@Override public void setEndPosition(int endPos)
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
private CoreLabel getCoreLabel(int labelIndex) { if (originalCoreLabels[labelIndex] != null) { CoreLabel terminalLabel = originalCoreLabels[labelIndex]; if (terminalLabel.value() == null && terminalLabel.word() != null) { terminalLabel.setValue(terminalLabel.word()); }//www . j a va 2 s . c o m return terminalLabel; } String wordStr = wordIndex.get(words[labelIndex]); CoreLabel terminalLabel = new CoreLabel(); terminalLabel.setValue(wordStr); terminalLabel.setWord(wordStr); terminalLabel.setBeginPosition(beginOffsets[labelIndex]); terminalLabel.setEndPosition(endOffsets[labelIndex]); if (originalTags[labelIndex] != null) { terminalLabel.setTag(originalTags[labelIndex].tag()); } return terminalLabel; }
From source file:de.l3s.workive.analysis.ner.GermanNER.java
public List<Entity> extractEntities(CoreMap sentence) { List<Entity> entityList = new ArrayList<Entity>(); CoreLabel prevEntity = null; String tag = ""; for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String entityTag = token.get(NamedEntityTagAnnotation.class); //System.out.println(entityTag); if (entityTag.compareToIgnoreCase("I-ORG") == 0 || entityTag.compareToIgnoreCase("I-PER") == 0 || entityTag.compareToIgnoreCase("I-LOC") == 0 || entityTag.compareToIgnoreCase("MISC") == 0) { if (prevEntity != null) { if (prevEntity.get(NamedEntityTagAnnotation.class).compareToIgnoreCase(entityTag) == 0 && prevEntity.endPosition() == token.beginPosition() - 1) { prevEntity.setEndPosition(token.endPosition()); prevEntity.set(TextAnnotation.class, prevEntity.get(TextAnnotation.class) + " " + token.get(TextAnnotation.class)); } else { Triple<String, Integer, Integer> triple = new Triple<String, Integer, Integer>( prevEntity.get(TextAnnotation.class), prevEntity.beginPosition(), prevEntity.endPosition()); entityList.add(new Entity(triple, tag)); prevEntity = token;// w ww. ja v a 2 s.com tag = entityTag; } } else { prevEntity = token; tag = entityTag; } } } if (prevEntity != null) { Triple<String, Integer, Integer> triple = new Triple<String, Integer, Integer>( prevEntity.get(TextAnnotation.class), prevEntity.beginPosition(), prevEntity.endPosition()); entityList.add(new Entity(triple, tag)); tag = ""; } return entityList; }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.CoreNlpUtils.java
License:Open Source License
public static CoreLabel tokenToWord(Token aToken) { CoreLabel t = new CoreLabel(); t.setOriginalText(aToken.getCoveredText()); t.setWord(aToken.getCoveredText());//from w ww . j av a2 s.c o m t.setBeginPosition(aToken.getBegin()); t.setEndPosition(aToken.getEnd()); if (aToken.getLemma() != null) { t.setLemma(aToken.getLemma().getValue()); } if (aToken.getPos() != null) { t.setTag(aToken.getPos().getPosValue()); } return t; }
From source file:edu.cmu.ml.rtw.users.ssrivastava.RegexExtractor.java
public static CoreMap getStanfordSentence(DocumentNLP document, int sentIdx) { List<String> words = document.getSentenceTokenStrs(sentIdx); List<PoSTag> posTags = document.getSentencePoSTags(sentIdx); List<CoreLabel> tokenList = new ArrayList<CoreLabel>(); for (int i = 0; i < words.size(); i++) { /*Re-create Stanford tokens*/ CoreLabel token = new CoreLabel(); token.setWord(words.get(i));// www . jav a 2 s .c om token.setTag(posTags.get(i).toString()); token.setNER("O"); token.setDocID(document.getName()); token.setSentIndex(sentIdx); token.setBeginPosition(document.getToken(sentIdx, i).getCharSpanStart()); token.setEndPosition(document.getToken(sentIdx, i).getCharSpanEnd()); //System.out.println(token.word()+" "+token.beginPosition()+" "+token.endPosition()); tokenList.add(token); } //Add NER labels for sentence List<Pair<TokenSpan, String>> ners = document.getNer(sentIdx); for (Pair<TokenSpan, String> p : ners) { for (int k = p.getFirst().getStartTokenIndex(); k < p.getFirst().getEndTokenIndex(); k++) { tokenList.get(k).setNER(p.getSecond()); } } //Convert to Stanford Sentence CoreMap sentence = new ArrayCoreMap(); sentence.set(TokensAnnotation.class, tokenList); sentence.set(CharacterOffsetBeginAnnotation.class, tokenList.get(0).beginPosition()); sentence.set(CharacterOffsetEndAnnotation.class, tokenList.get(words.size() - 1).endPosition()); return sentence; }
From source file:NER.NERAnnotation.java
public List<Entity> extractEntities(CoreMap sentence) { List<Entity> entityList = new ArrayList<Entity>(); CoreLabel prevEntity = null; String tag = ""; for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String entityTag = token.get(NamedEntityTagAnnotation.class); //System.out.println(entityTag); if (entityTag.compareToIgnoreCase("LOCATION") == 0 || entityTag.compareToIgnoreCase("DATE") == 0 || entityTag.compareToIgnoreCase("PERSON") == 0 || entityTag.compareToIgnoreCase("ORGANIZATION") == 0 || entityTag.compareToIgnoreCase("MISC") == 0) { if (prevEntity != null) { if (prevEntity.get(NamedEntityTagAnnotation.class).compareToIgnoreCase(entityTag) == 0 && prevEntity.endPosition() == token.beginPosition() - 1) { prevEntity.setEndPosition(token.endPosition()); prevEntity.set(TextAnnotation.class, prevEntity.get(TextAnnotation.class) + " " + token.get(TextAnnotation.class)); // tag=entityTag; // System.out.println(entityTag); } else { Triple<String, Integer, Integer> triple = new Triple<String, Integer, Integer>( prevEntity.get(TextAnnotation.class), prevEntity.beginPosition(), prevEntity.endPosition()); entityList.add(new Entity(triple, tag)); prevEntity = token;/*from ww w. j a v a 2s.c o m*/ tag = entityTag; } } else { prevEntity = token; tag = entityTag; //System.out.println(entityTag); } } } if (prevEntity != null) { Triple<String, Integer, Integer> triple = new Triple<String, Integer, Integer>( prevEntity.get(TextAnnotation.class), prevEntity.beginPosition(), prevEntity.endPosition()); entityList.add(new Entity(triple, tag)); tag = ""; // System.out.println(tag); } return entityList; }