List of usage examples for edu.stanford.nlp.trees TypedDependency TypedDependency
public TypedDependency(GrammaticalRelation reln, IndexedWord gov, IndexedWord dep)
From source file:ca.ualberta.exemplar.core.ParserMalt.java
License:Open Source License
@Override public List<CoreMap> parseText(String text) { List<CoreMap> sentences = null; try {//from w w w . j a v a 2 s .com Annotation document = new Annotation(text); pipeline.annotate(document); sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(TokensAnnotation.class); String[] conllInput = sentenceToCoNLLInput(tokens); DependencyGraph graph = (DependencyGraph) maltParser.parse(conllInput); Result result = graphToCoNLL(graph); List<List<String>> conll = result.conll; int rootIndex = result.rootIndex; EnglishGrammaticalStructure gs = EnglishGrammaticalStructure.buildCoNNLXGrammaticStructure(conll); TreeGraphNode root = null; List<TypedDependency> deps = gs.typedDependenciesCCprocessed(); // Add root dependency and ner annotations int size = deps.size(); for (int i = 0; i < size; i++) { TypedDependency td = deps.get(i); if (td.gov().index() == rootIndex) { root = td.gov(); deps.add(new TypedDependency(GrammaticalRelation.ROOT, td.gov(), td.gov())); } { TreeGraphNode n = td.dep(); if (n.label().ner() == null) { n.label().setNER(tokens.get(n.index() - 1).ner()); n.label().setBeginPosition(tokens.get(n.index() - 1).beginPosition()); n.label().setEndPosition(tokens.get(n.index() - 1).endPosition()); n.label().setLemma(tokens.get(n.index() - 1).lemma()); } } { TreeGraphNode n = td.gov(); if (n.label().ner() == null) { n.label().setNER(tokens.get(n.index() - 1).ner()); n.label().setBeginPosition(tokens.get(n.index() - 1).beginPosition()); n.label().setEndPosition(tokens.get(n.index() - 1).endPosition()); n.label().setLemma(tokens.get(n.index() - 1).lemma()); } } } if (root == null) continue; List<TreeGraphNode> roots = new ArrayList<TreeGraphNode>(); roots.add(gs.root()); SemanticGraph sg = new SemanticGraph(deps, roots); sentence.set(CollapsedCCProcessedDependenciesAnnotation.class, sg); } } catch (Exception e) { e.printStackTrace(); } return sentences; }
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.DKPro2CoreNlp.java
License:Open Source License
public Annotation convert(JCas aSource, Annotation aTarget) { // Document annotation aTarget.set(CoreAnnotations.TextAnnotation.class, aSource.getDocumentText()); // Sentences//from w w w . ja v a 2 s. c om List<CoreMap> sentences = new ArrayList<>(); for (Sentence s : select(aSource, Sentence.class)) { if (StringUtils.isBlank(s.getCoveredText())) { continue; } String sentenceText = s.getCoveredText(); if (encoding != null && !"UTF-8".equals(encoding.name())) { sentenceText = new String(sentenceText.getBytes(StandardCharsets.UTF_8), encoding); } Annotation sentence = new Annotation(sentenceText); sentence.set(CharacterOffsetBeginAnnotation.class, s.getBegin()); sentence.set(CharacterOffsetEndAnnotation.class, s.getEnd()); sentence.set(SentenceIndexAnnotation.class, sentences.size()); // Tokens Map<Token, IndexedWord> idxTokens = new HashMap<>(); List<CoreLabel> tokens = new ArrayList<>(); for (Token t : selectCovered(Token.class, s)) { String tokenText = t.getCoveredText(); if (encoding != null && !"UTF-8".equals(encoding.name())) { tokenText = new String(tokenText.getBytes(StandardCharsets.UTF_8), encoding); } CoreLabel token = tokenFactory.makeToken(tokenText, t.getBegin(), t.getEnd() - t.getBegin()); // First add token so that tokens.size() returns a 1-based counting as required // by IndexAnnotation tokens.add(token); token.set(SentenceIndexAnnotation.class, sentences.size()); token.set(IndexAnnotation.class, tokens.size()); token.set(TokenKey.class, t); idxTokens.put(t, new IndexedWord(token)); // POS tags if (readPos && t.getPos() != null) { token.set(PartOfSpeechAnnotation.class, t.getPos().getPosValue()); } // Lemma if (t.getLemma() != null) { token.set(LemmaAnnotation.class, t.getLemma().getValue()); } // Stem if (t.getStem() != null) { token.set(StemAnnotation.class, t.getStem().getValue()); } // NamedEntity // TODO: only token-based NEs are supported, but not multi-token NEs // Supporting multi-token NEs via selectCovering would be very slow. To support // them, another approach would need to be implemented, e.g. via indexCovering. List<NamedEntity> nes = selectCovered(NamedEntity.class, t); if (nes.size() > 0) { token.set(NamedEntityTagAnnotation.class, nes.get(0).getValue()); } else { token.set(NamedEntityTagAnnotation.class, "O"); } } // Constituents for (ROOT r : selectCovered(ROOT.class, s)) { Tree tree = createStanfordTree(r, idxTokens); tree.indexSpans(); sentence.set(TreeAnnotation.class, tree); } // Dependencies List<TypedDependency> dependencies = new ArrayList<>(); for (Dependency d : selectCovered(Dependency.class, s)) { TypedDependency dep = new TypedDependency(GrammaticalRelation.valueOf(d.getDependencyType()), idxTokens.get(d.getGovernor()), idxTokens.get(d.getDependent())); if (DependencyFlavor.ENHANCED.equals(d.getFlavor())) { dep.setExtra(); } dependencies.add(dep); } sentence.set(EnhancedDependenciesAnnotation.class, new SemanticGraph(dependencies)); if (ptb3Escaping) { tokens = applyPtbEscaping(tokens, quoteBegin, quoteEnd); } sentence.set(TokensAnnotation.class, tokens); sentences.add(sentence); } aTarget.set(SentencesAnnotation.class, sentences); return aTarget; }
From source file:edu.jhu.agiga.StanfordAgigaSentence.java
License:Open Source License
public List<TypedDependency> getStanfordTypedDependencies(DependencyForm form) { List<TypedDependency> dependencies = new ArrayList<TypedDependency>(); if (this.nodes == null) nodes = getStanfordTreeGraphNodes(form); List<AgigaTypedDependency> agigaDeps = getAgigaDeps(form); for (AgigaTypedDependency agigaDep : agigaDeps) { // Add one, since the tokens are zero-indexed but the TreeGraphNodes are one-indexed TreeGraphNode gov = nodes.get(agigaDep.getGovIdx() + 1); TreeGraphNode dep = nodes.get(agigaDep.getDepIdx() + 1); // Create the typed dependency TypedDependency typedDep = new TypedDependency(GrammaticalRelation.valueOf(agigaDep.getType()), gov, dep);/*from w w w .ja v a2s. com*/ dependencies.add(typedDep); } return dependencies; }