List of usage examples for edu.stanford.nlp.ling IndexedWord IndexedWord
public IndexedWord(CoreLabel w)
From source file:count_dep.Count_dep.java
public LinkedList<Event> GetEvents(SemanticGraph dependencies, CoreMap sentence) { LinkedList<Event> res = new LinkedList<>(); LinkedList<IndexedWord> roots = new LinkedList<>(); List<CoreLabel> words = sentence.get(TokensAnnotation.class); List<GrammaticalRelation> senserel = new LinkedList<>(); senserel.add(GrammaticalRelation.valueOf("nsubj")); senserel.add(GrammaticalRelation.valueOf("dobj")); for (CoreLabel word : words) { if (word.tag().length() >= 2 && ("VB".equals(word.tag().substring(0, 2)) || "NN".equals(word.tag().substring(0, 2)))) { IndexedWord iword = new IndexedWord(word); roots.add(iword);//from w w w . ja v a2s . c o m } } for (IndexedWord word : roots) { Event e = new Event(); e.trigger = word.word(); try { Set<IndexedWord> children = dependencies.getChildren(word); children.stream().forEach((iw) -> { e.arguments.add(new EventArgument(iw.word(), "")); }); if (dependencies.inDegree(word) > 0) { IndexedWord parent = dependencies.getParent(word); if (parent.tag().length() >= 2 && "VB".equals(parent.tag().substring(0, 2))) { Set<IndexedWord> children1 = dependencies.getChildrenWithRelns(parent, senserel); children1.remove(word); children1.stream().forEach((iw) -> { e.arguments.add(new EventArgument(iw.word(), "")); }); } else { e.arguments.add(new EventArgument(dependencies.getParent(word).word(), "")); } } } catch (java.lang.IllegalArgumentException error) { continue; } res.add(e); } return res; }
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.DKPro2CoreNlp.java
License:Open Source License
public Annotation convert(JCas aSource, Annotation aTarget) { // Document annotation aTarget.set(CoreAnnotations.TextAnnotation.class, aSource.getDocumentText()); // Sentences/*w ww .j a va2s. c o m*/ List<CoreMap> sentences = new ArrayList<>(); for (Sentence s : select(aSource, Sentence.class)) { if (StringUtils.isBlank(s.getCoveredText())) { continue; } String sentenceText = s.getCoveredText(); if (encoding != null && !"UTF-8".equals(encoding.name())) { sentenceText = new String(sentenceText.getBytes(StandardCharsets.UTF_8), encoding); } Annotation sentence = new Annotation(sentenceText); sentence.set(CharacterOffsetBeginAnnotation.class, s.getBegin()); sentence.set(CharacterOffsetEndAnnotation.class, s.getEnd()); sentence.set(SentenceIndexAnnotation.class, sentences.size()); // Tokens Map<Token, IndexedWord> idxTokens = new HashMap<>(); List<CoreLabel> tokens = new ArrayList<>(); for (Token t : selectCovered(Token.class, s)) { String tokenText = t.getCoveredText(); if (encoding != null && !"UTF-8".equals(encoding.name())) { tokenText = new String(tokenText.getBytes(StandardCharsets.UTF_8), encoding); } CoreLabel token = tokenFactory.makeToken(tokenText, t.getBegin(), t.getEnd() - t.getBegin()); // First add token so that tokens.size() returns a 1-based counting as required // by IndexAnnotation tokens.add(token); token.set(SentenceIndexAnnotation.class, sentences.size()); token.set(IndexAnnotation.class, tokens.size()); token.set(TokenKey.class, t); idxTokens.put(t, new IndexedWord(token)); // POS tags if (readPos && t.getPos() != null) { token.set(PartOfSpeechAnnotation.class, t.getPos().getPosValue()); } // Lemma if (t.getLemma() != null) { token.set(LemmaAnnotation.class, t.getLemma().getValue()); } // Stem if (t.getStem() != null) { token.set(StemAnnotation.class, t.getStem().getValue()); } // NamedEntity // TODO: only token-based NEs are supported, but not multi-token NEs // Supporting multi-token NEs via selectCovering would be very slow. To support // them, another approach would need to be implemented, e.g. via indexCovering. List<NamedEntity> nes = selectCovered(NamedEntity.class, t); if (nes.size() > 0) { token.set(NamedEntityTagAnnotation.class, nes.get(0).getValue()); } else { token.set(NamedEntityTagAnnotation.class, "O"); } } // Constituents for (ROOT r : selectCovered(ROOT.class, s)) { Tree tree = createStanfordTree(r, idxTokens); tree.indexSpans(); sentence.set(TreeAnnotation.class, tree); } // Dependencies List<TypedDependency> dependencies = new ArrayList<>(); for (Dependency d : selectCovered(Dependency.class, s)) { TypedDependency dep = new TypedDependency(GrammaticalRelation.valueOf(d.getDependencyType()), idxTokens.get(d.getGovernor()), idxTokens.get(d.getDependent())); if (DependencyFlavor.ENHANCED.equals(d.getFlavor())) { dep.setExtra(); } dependencies.add(dep); } sentence.set(EnhancedDependenciesAnnotation.class, new SemanticGraph(dependencies)); if (ptb3Escaping) { tokens = applyPtbEscaping(tokens, quoteBegin, quoteEnd); } sentence.set(TokensAnnotation.class, tokens); sentences.add(sentence); } aTarget.set(SentencesAnnotation.class, sentences); return aTarget; }
From source file:main.java.parsers.StanfordParser.java
/** * Parses a given input text document using the Stanford CoreNLP parser. * //from w ww . jav a 2s . co m * @param document * @throws java.io.UnsupportedEncodingException * @throws java.lang.InterruptedException */ public static void parse(Doc document) throws UnsupportedEncodingException, IOException, InterruptedException { // Initialize an Annotation with some text to be annotated. The text is the argument to the constructor. Annotation annotation = new Annotation(new String(document.text.getBytes("UTF-8"), "UTF-8")); // run all the selected Annotators on this text pipeline.annotate(annotation); // An Annotation is a Map and you can get and use the various analyses individually. List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); //returns if the annotation is empty. if (sentences == null || sentences.isEmpty()) return; //map linking token offsets with their tokens annotation from the Stanford tool. for (CoreMap sentence : sentences) { String sentenceStr = ""; int sentenceNum = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class); Map<Integer, Integer> tokenNumStartOffset = document.sentenceTokenNumStartOffset.get(sentenceNum); if (tokenNumStartOffset == null) document.sentenceTokenNumStartOffset.put(sentenceNum, tokenNumStartOffset = new HashMap<>()); Map<Integer, List<String>> startOffsetSRLRoles = new TreeMap<>(); //extracting tokenized information from the stanford parser output. for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { sentenceStr += token.value() + " "; document.startOffsetIndexedWord.put(token.beginPosition(), new IndexedWord(token)); tokenNumStartOffset.put(token.index(), token.beginPosition()); startOffsetSRLRoles.put(token.beginPosition(), null); } //write the tokenized sentence to an output file FileOutputStream output = new FileOutputStream(Main.RESOURCES_DIR + "\\senna\\log.txt"); output.write(sentenceStr.getBytes()); //the semantic roles labels for the sentence are obtained by applying SENNA startOffsetSRLRoles = SENNASrl.getSRLRoles(startOffsetSRLRoles); //set the srl tags document.startOffsetSRLRoles.putAll(startOffsetSRLRoles); //parse tree of the sentence String stanfordParseTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class).toString(); ParseTree parseTree = new ParseTree(stanfordParseTree); parseTree.convertParseTree(); document.setSentenceParseTree(sentenceNum, parseTree); //dependency graph of the sentence SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); document.setSentenceDependencyGraph(sentenceNum, graph); } }
From source file:opendial.bn.values.RelationalVal.java
License:Open Source License
public void addGraph(SemanticGraph newGraph) { int oldGraphSize = graph.size(); for (IndexedWord iw : newGraph.vertexListSorted()) { IndexedWord copy = new IndexedWord(iw); copy.setIndex(graph.size());/*from www . j av a2 s .co m*/ graph.addVertex(copy); } for (SemanticGraphEdge edge : newGraph.edgeListSorted()) { int dep = edge.getDependent().index() + oldGraphSize; int gov = edge.getGovernor().index() + oldGraphSize; GrammaticalRelation rel = edge.getRelation(); addEdge(gov, dep, rel.getLongName()); } cachedHashCode = 0; }
From source file:opendial.bn.values.RelationalVal.java
License:Open Source License
public int addNode(String value) { CoreLabel label = new CoreLabel(); label.setWord(value);//from w w w. j av a 2 s . c o m label.setValue(value); IndexedWord fword = new IndexedWord(label); fword.setIndex(graph.size()); graph.addVertex(fword); cachedHashCode = 0; return fword.index(); }
From source file:sleventextraction.SLEntity.java
public SLEntity(AceMention m, CoreMap senCM, SemanticGraph senSG) { this();//w ww . j a va2s . c o m isArg = m.isArg; argProb = m.argProb; role = m.role; if (m.getParent() instanceof AceJet.AceEntity) { this.entitytype = ((AceEntity) m.getParent()).type; this.entitysubtype = ((AceEntity) m.getParent()).subtype; } else if (m.getParent() instanceof AceJet.AceTimex) { this.entitytype = ""; this.entitysubtype = ""; } else if (m.getParent() instanceof AceJet.AceValue) { this.entitytype = ((AceValue) m.getParent()).type; this.entitysubtype = ((AceValue) m.getParent()).subtype; } else { this.entitytype = ""; this.entitysubtype = ""; } this.mentiontype = m.getType(); System.arraycopy(m.roleProb, 0, roleProb, 0, m.roleProb.length); ground = m.ground; span = senCM; SemanticGraph totaldep = span.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); this.content = m.text.trim(); if (m.text.charAt(0) == '\"') { this.content = m.text.substring(1).trim(); } if ("s\nb".equals(this.content)) { this.content = "his brother"; } else if (" f".equals(this.content)) { this.content = "foreign"; } else if ("-l".equals(this.content)) { this.content = "US-led"; } else if ("s a".equals(this.content)) { if (span.toString().contains("Arafat's administration")) { this.content = "Arafat's administration"; } else if (span.toString().contains("bus attack")) { this.content = "bus attack"; } } else if ("33-month".equals(this.content)) { this.content = "33-month-old"; } else if ("U.S".equals(this.content)) { this.content = "U.S."; } else if ("four-day".equals(this.content)) { this.content = "four-day-old"; } else if ("U.N".equals(this.content)) { this.content = "U.N."; } else if ("33-year".equals(this.content)) { this.content = "33-year-old"; } Annotation document = ParseSentence(this.content); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); CoreMap cm = sentences.get(0); int pathlength = -1, imin = 1000; for (int i = 0; i < senCM.get(TokensAnnotation.class).size(); i++) { IndexedWord debug = new IndexedWord(senCM.get(TokensAnnotation.class).get(i)); boolean canmatch = true; for (int j = 0; j < cm.get(TokensAnnotation.class).size(); j++) { IndexedWord iw = new IndexedWord(senCM.get(TokensAnnotation.class).get(i + j)); IndexedWord shortiw = new IndexedWord(cm.get(TokensAnnotation.class).get(j)); if (!iw.word().equals(shortiw.word())) { if (SLEventExtraction.overlap(iw.word(), shortiw.word()) <= 0 || Double.isNaN(SLEventExtraction.overlap(iw.word(), shortiw.word()))) { canmatch = false; break; } } } if (canmatch) { for (int j = 0; j < cm.get(TokensAnnotation.class).size(); j++) { IndexedWord iw = new IndexedWord(senCM.get(TokensAnnotation.class).get(i + j)); this.ContentIws.add(iw); try { pathlength = totaldep.getPathToRoot(iw).size(); } catch (java.lang.IllegalArgumentException err) { pathlength = 100; } if (imin > pathlength) { imin = pathlength; this.head = iw; } } break; } } if (this.head == null) { return; } this.predicate = totaldep.getParent(this.head); if (this.predicate == null) { this.predicate = this.head; } else { IndexedWord curr = head; dep = totaldep.getEdge(predicate, curr).getRelation().getShortName(); if (totaldep.getEdge(predicate, curr).getRelation().getSpecific() != null) { dep += "_" + totaldep.getEdge(predicate, curr).getRelation().getSpecific(); } } }
From source file:sleventextraction.SLEventTypeClassifier.java
public static IndexedWord GetCorrespondingIndexedWord(String anchor, CoreMap cm) { anchor = anchor.replaceAll("[.|,|\n|\"]", " "); String[] split = anchor.split("[ |']"); IndexedWord iw = null;//from w w w . j av a 2 s .c om for (int i = split.length - 1; i >= 0; i--) { for (CoreLabel token : cm.get(TokensAnnotation.class)) { iw = new IndexedWord(token); if (split[i].contains(iw.word()) || iw.word().contains(split[i])) { if (Math.abs(split[i].length() - iw.word().length()) <= 3) { return iw; } else if (iw.word().contains("-")) { String[] split1 = iw.word().split("-"); boolean match = false; for (int j = 0; j < split1.length; j++) { if (split1[j].equals(split[i])) { match = true; } } if (match) { return iw; } } } if (split[i].contains(iw.lemma())) { if (Math.abs(split[i].length() - iw.lemma().length()) <= 2) { return iw; } } } } return null; }
From source file:sleventextraction.SLEventTypeClassifier.java
private LinkedList<String> GetCandidateTriggers(Annotation parsedsen) { LinkedList<String> res = new LinkedList<>(); List<CoreMap> sentences = parsedsen.get(CoreAnnotations.SentencesAnnotation.class); assert sentences.size() == 1; CoreMap cm = sentences.get(0);//w w w . ja v a2 s .co m for (CoreLabel token : cm.get(TokensAnnotation.class)) { IndexedWord iw = new IndexedWord(token); if (iw.tag().contains("NN") || iw.tag().contains("VB")) { res.add(iw.word()); } } return res; }
From source file:slvectormodel.SLVectorModel.java
private LinkedList<Double> LexicalChain(SLEntity e) { LinkedList<Double> res = new LinkedList<>(); res = SLMath.Vector_0(SLEventExtraction.dim); CoreMap sentence = e.span;//from w w w . j av a2s . c om List<CoreLabel> cmwords = sentence.get(TokensAnnotation.class); List<IndexedWord> iwwords = new LinkedList<>(); int pos = -1; for (int i = 0; i < cmwords.size(); i++) { IndexedWord iw = new IndexedWord(cmwords.get(i)); iwwords.add(iw); if (iw == e.head) { pos = i; } } int count = 0; for (int i = pos - 1; i >= 0; i--) { if (iwwords.get(i).tag().contains("VB") || iwwords.get(i).tag().contains("JJ")) { count++; if (SLEventExtraction.word2vec.containsKey(iwwords.get(i).word())) { res = SLMath.Vector_add(res, SLEventExtraction.word2vec.get(iwwords.get(i).word())); } else if (SLEventExtraction.word2vec.containsKey(iwwords.get(i).word().toLowerCase())) { res = SLMath.Vector_add(res, SLEventExtraction.word2vec.get(iwwords.get(i).word().toLowerCase())); } else if (SLEventExtraction.word2vec.containsKey(iwwords.get(i).lemma())) { res = SLMath.Vector_add(res, SLEventExtraction.word2vec.get(iwwords.get(i).lemma())); } } if (count >= halfwindow) { break; } } count = 0; for (int i = pos + 1; i < iwwords.size(); i++) { if (iwwords.get(i).tag().contains("VB") || iwwords.get(i).tag().contains("JJ")) { count++; if (SLEventExtraction.word2vec.containsKey(iwwords.get(i).word())) { res = SLMath.Vector_add(res, SLEventExtraction.word2vec.get(iwwords.get(i).word())); } else if (SLEventExtraction.word2vec.containsKey(iwwords.get(i).word().toLowerCase())) { res = SLMath.Vector_add(res, SLEventExtraction.word2vec.get(iwwords.get(i).word().toLowerCase())); } else if (SLEventExtraction.word2vec.containsKey(iwwords.get(i).lemma())) { res = SLMath.Vector_add(res, SLEventExtraction.word2vec.get(iwwords.get(i).lemma())); } } if (count >= halfwindow) { break; } } return res; }