List of usage examples for edu.stanford.nlp.ling IndexedWord index
@Override
public int index()
From source file:ca.ualberta.exemplar.core.ArgumentExtraction.java
License:Open Source License
private Argument getEntityFromHead(IndexedWord head, CoreMap sentence, SemanticGraph dependencies, String argumentType) {/*from w w w . j a va 2 s.co m*/ int startIndex = head.index() - 1; //Changing from starting at 1 to starting at 0 int endIndex = head.index() - 1; List<CoreLabel> tokens = sentence.get(TokensAnnotation.class); CoreLabel token = tokens.get(startIndex); String ne = token.get(NamedEntityTagAnnotation.class); StringBuilder builder = new StringBuilder(); builder.append(token.get(TextAnnotation.class)); int startOffset = token.beginPosition(); int endOffset = token.endPosition(); // Look for first token of the entity. for (int index = startIndex - 1; index >= 0; index--) { token = tokens.get(index); String word = token.get(TextAnnotation.class); if (!ne.equals(token.get(NamedEntityTagAnnotation.class))) break; startIndex--; builder.insert(0, word + " "); startOffset = token.beginPosition(); } for (int index = endIndex + 1; index < tokens.size(); index++) { token = tokens.get(index); String word = token.get(TextAnnotation.class); if (!ne.equals(token.get(NamedEntityTagAnnotation.class))) break; endIndex++; builder.append(" " + word); endOffset = token.endPosition(); } String entityName = builder.toString(); String entityType = normalizeEntityType(ne); String entityId = entityName + "#" + entityType; Argument argument = new Argument(argumentType, entityId, entityName, entityType, startIndex, endIndex, startOffset, endOffset); return argument; }
From source file:ca.ualberta.exemplar.core.RelationExtraction.java
License:Open Source License
private static void sortWordsByIndex(List<IndexedWord> words) { Collections.sort(words, new Comparator<IndexedWord>() { @Override// w ww . j a v a 2 s . com public int compare(IndexedWord a0, IndexedWord a1) { return a0.index() - a1.index(); } }); }
From source file:ca.ualberta.exemplar.core.RelationExtraction.java
License:Open Source License
private static void addModifiers(List<IndexedWord> words, IndexedWord word, SemanticGraph dependencies) { List<IndexedWord> adjs = dependencies.getChildrenWithReln(word, GrammaticalRelation.valueOf("amod")); List<IndexedWord> nns = dependencies.getChildrenWithReln(word, GrammaticalRelation.valueOf("nn")); List<IndexedWord> negs = dependencies.getChildrenWithReln(word, GrammaticalRelation.valueOf("neg")); List<IndexedWord> pvts = dependencies.getChildrenWithReln(word, GrammaticalRelation.valueOf("pvt")); // phrasal verb particle -- shut down List<IndexedWord> newWords = new ArrayList<IndexedWord>(); if (adjs != null) newWords.addAll(adjs);/* w w w. java 2 s . co m*/ if (nns != null) newWords.addAll(nns); if (negs != null) newWords.addAll(negs); if (pvts != null) newWords.addAll(pvts); for (IndexedWord newWord : newWords) { if (Math.abs(word.index() - newWord.index()) > 5) { // If a modifier is too far way from trigger (> 5 tokens), ignore this modifier since it is probably a mistake continue; } if (!newWord.ner().equals("PERSON") && !newWord.ner().equals("ORGANIZATION") && !newWord.ner().equals("LOCATION") && !newWord.ner().equals("MISC")) { words.add(newWord); } } }
From source file:edu.anu.spice.SpiceParser.java
License:Open Source License
/** * Checks if a word has a numerical modifier, and if so adds it as an object * with attribute//from w w w .j ava 2s . c o m */ protected void checkForNumericAttribute(ProposedTuples tuples, SemanticGraph sg, IndexedWord word) { if (sg.hasChildWithReln(word, UniversalEnglishGrammaticalRelations.NUMERIC_MODIFIER)) { IndexedWord nummod = sg.getChildWithReln(word, UniversalEnglishGrammaticalRelations.NUMERIC_MODIFIER); /* Prevent things like "number 5" */ if (nummod.index() < word.index()) { tuples.addTuple(word, nummod); } } else if (sg.hasChildWithReln(word, SemanticGraphEnhancer.QMOD_RELATION)) { IndexedWord qmod = sg.getChildWithReln(word, SemanticGraphEnhancer.QMOD_RELATION); tuples.addTuple(word, qmod); } }
From source file:edu.jhu.hlt.concrete.stanford.PreNERCoreMapWrapper.java
License:Open Source License
private List<Dependency> makeDependencies(SemanticGraph graph) { List<Dependency> depList = new ArrayList<Dependency>(); for (IndexedWord root : graph.getRoots()) { // this mimics CoreNLP's handling String rel = GrammaticalRelation.ROOT.getLongName().replaceAll("\\s+", ""); int dep = root.index() - 1; Dependency depend = DependencyFactory.create(dep, rel); depList.add(depend);//from w w w . j av a2 s. com } for (SemanticGraphEdge edge : graph.edgeListSorted()) { String rel = edge.getRelation().toString().replaceAll("\\s+", ""); int gov = edge.getSource().index() - 1; int dep = edge.getTarget().index() - 1; Dependency depend = DependencyFactory.create(dep, rel, gov); depList.add(depend); } return depList; }
From source file:eu.ubipol.opinionmining.nlp_engine.Sentence.java
License:Open Source License
protected Sentence(SemanticGraph dependencies, int indexStart, DatabaseAdapter adp, int beginPosition) { IndexedWord rootWord = dependencies.getFirstRoot(); sentenceRoot = new Token(rootWord.originalText(), rootWord.lemma(), rootWord.tag(), null, null, rootWord.index() + indexStart, rootWord.beginPosition(), rootWord.endPosition(), adp, beginPosition);//ww w.j av a2 s. c om addChildTokens(sentenceRoot, rootWord, dependencies, indexStart, adp, beginPosition); sentenceRoot.transferScores(); if (sentenceRoot.isAKeyword()) sentenceRoot.addAspectScore(sentenceRoot.getScore(), sentenceRoot.getWeight(), sentenceRoot.getAspect()); indexStart += dependencies.size(); }
From source file:eu.ubipol.opinionmining.nlp_engine.Sentence.java
License:Open Source License
private void addChildTokens(Token rootToken, IndexedWord currentRoot, SemanticGraph dependencies, int indexStart, DatabaseAdapter adp, int beginPosition) { for (IndexedWord child : dependencies.getChildren(currentRoot)) { Token childToken = new Token(child.originalText(), child.lemma(), child.tag(), rootToken, dependencies.getEdge(currentRoot, child).toString(), child.index() + indexStart, child.beginPosition(), child.endPosition(), adp, beginPosition); rootToken.addChildToken(childToken); addChildTokens(childToken, child, dependencies, indexStart, adp, beginPosition); }/*from ww w. j a va 2s .c o m*/ }
From source file:main.java.spatialrelex.markup.SpatialElement.java
public static SpatialElement setSpatialElementFeatures(Doc document, SpatialElement se) { IndexedWord iw = document.startOffsetIndexedWord.get(se.start); se.lemmaText = iw.lemma();/* ww w. j a v a 2s . co m*/ se.startToken = iw.index(); se.endToken = iw.index(); int i = se.start + 1; while (i < se.end) { if (!document.startOffsetIndexedWord.containsKey(i)) { i++; continue; } iw = document.startOffsetIndexedWord.get(i); se.endToken = iw.index(); se.lemmaText += " " + iw.lemma(); if (iw.tag().contains("NN")) { se.generalInquirerCategories = GeneralInquirer .getGeneralInquirerCategories(se.generalInquirerCategories, iw.value().toLowerCase()); se = WordNet.setWordNetSynsetsAndHypernyms(se, iw.tag(), "NN"); } else if (iw.tag().contains("VB")) { se.verbNetClasses = VerbNet.getVerbNetClasses(se.verbNetClasses, iw.value().toLowerCase()); se = WordNet.setWordNetSynsetsAndHypernyms(se, iw.tag(), "VB"); } List<String> tokenSRLs = document.startOffsetSRLRoles.get(i); i++; if (tokenSRLs == null) continue; for (String tokenSRL : tokenSRLs) { if (se.srls.contains(tokenSRL)) continue; se.srls.add(tokenSRL); } } return se; }
From source file:opendial.bn.values.RelationalVal.java
License:Open Source License
public int addNode(String value) { CoreLabel label = new CoreLabel(); label.setWord(value);/*from w ww. j av a 2s . c om*/ label.setValue(value); IndexedWord fword = new IndexedWord(label); fword.setIndex(graph.size()); graph.addVertex(fword); cachedHashCode = 0; return fword.index(); }
From source file:org.sam_agent.csparser.ContinuousParser.java
License:Open Source License
public String stringify(Collection<IndexedWord> roots) { List<String> rootTokens = new ArrayList<String>(); for (IndexedWord root : roots) { rootTokens.add(String.format("\"%s-%d\"", esc(root.value()), root.index())); }/*from w w w . j a v a2 s. co m*/ return "[" + String.join(",", rootTokens) + "]"; }