List of usage examples for edu.stanford.nlp.trees Tree valueOf
public static Tree valueOf(String str)
From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java
License:Open Source License
private List<String> decompose(String documentText) { List<Tree> trees = new ArrayList<Tree>(); for (String sentence : AnalysisUtilities.getSentences(documentText)) { trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse); }//from w w w .ja va 2s .co m List<String> result = new ArrayList<String>(); for (Tree t : trees) { TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) "); TregexMatcher m = p.matcher(t); while (m.find()) { Tree np = m.getNode("np"); Tree vp = m.getNode("vp"); Tree np2 = np.deepCopy(); TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)"); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); ps.add(Tsurgeon.parseOperation("prune sbarq")); ps.add(Tsurgeon.parseOperation("prune c1")); ps.add(Tsurgeon.parseOperation("prune c2")); Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2); np = np2; Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))"); result.add(AnalysisUtilities.orginialSentence(newTree.yield())); } } return result; }
From source file:edu.nus.comp.nlp.tool.anaphoraresolution.AnnotatedText.java
License:Open Source License
private AnnotatedText(List<String> sentences) { rootNode = new DefaultMutableTreeNode(); for (int i = 0; i < sentences.size(); i++) { String sentence = sentences.get(i); TreeAdapter adpater = new TreeAdapter(Tree.valueOf(sentence), i); DefaultMutableTreeNode tn = adpater.getDefaultMutableTreeNode(); rootNode.add(tn);/*from w w w . j ava 2 s . c o m*/ } // rootNode = buildParseTree(sents); NPExtractor ex = new NPExtractor(rootNode); NPList = ex.getNPList(); PRPList = ex.getPRPList(); identifyPleonasticPronoun(rootNode); SNPList = buildSNPList(NPList); }
From source file:elkfed.coref.mentions.AbstractMentionFactory.java
License:Apache License
public List<Mention> extractMentions(MiniDiscourse doc) throws IOException { _currentText = CorefDocuments.getInstance().getText(doc); ArrayList<Tree> parseTrees = new ArrayList<Tree>(); ArrayList<Integer> parseStart = new ArrayList<Integer>(); ArrayList<Integer> parseEnd = new ArrayList<Integer>(); ArrayList<Utterance> utterances = new ArrayList<Utterance>(); // extract parses and create utterances for each sentence for (Markable parseMarkable : DiscourseUtils.getMarkables(doc, DEFAULT_PARSE_LEVEL)) { Tree currParseTree = postProcess( Tree.valueOf(parseMarkable.getAttributeValue(PipelineComponent.TAG_ATTRIBUTE))); Utterance currUtt = new Utterance(currParseTree); parseTrees.add(currParseTree);/* w ww . jav a 2 s. c o m*/ parseStart.add(parseMarkable.getLeftmostDiscoursePosition()); parseEnd.add(parseMarkable.getRightmostDiscoursePosition()); currUtt.setLeftBoundary(parseMarkable.getLeftmostDiscoursePosition()); currUtt.setRightBoundary(parseMarkable.getRightmostDiscoursePosition()); utterances.add(currUtt); } ArrayList<Mention> inTextMarkables = new ArrayList<Mention>(); for (Markable m_markable : DiscourseUtils.getMarkables(doc, DEFAULT_MARKABLE_LEVEL)) { if (keepMarkable(m_markable)) { Markable m_coref = CorefDocuments.getInstance().markableIsaCorefElement(doc, m_markable); Mention mention = new Mention(m_markable, doc); if (m_coref != null) { mention.setSetID(m_coref.getAttributeValue(COREF_SET_ATTRIBUTE)); reportMapping(m_markable, m_coref); } else { reportMapping(m_markable, null); } // find the parse tree that this markable is in int startPos = m_markable.getLeftmostDiscoursePosition(); int endPos = m_markable.getRightmostDiscoursePosition(); int endPosP = m_markable.getRightmostDiscoursePosition(); mention.setStartWord(startPos); mention.setEndWord(endPos); if (m_markable.getAttributeValue("min_ids") != null) { String[] spans = MarkableHelper.parseRanges(m_markable.getAttributeValue("min_ids")); startPos = doc.DiscoursePositionFromDiscourseElementID(spans[0]); endPos = doc.DiscoursePositionFromDiscourseElementID(spans[spans.length - 1]); } Boolean found = false; for (int i = 0; i < parseTrees.size() && !found; i++) { final int sentStart = parseStart.get(i); final int sentEnd = parseEnd.get(i); /* if (startPos >= sentStart && endPos <= sentEnd) { * gold/carafe markables may disrespect sentence boundaries :(( * they should still receive at least some sentence information though */ if (startPos >= sentStart && startPos <= sentEnd) { found = true; int startOff = startPos - sentStart; int endOff = endPosP - sentStart; Utterance utt = utterances.get(i); mention.setSentenceStart(sentStart); mention.setSentenceEnd(sentEnd); mention.setParseInfo(parseTrees.get(i), startOff, endOff); mention.setUtterance(utt); } } mention.createDiscourseEntity(); mention.createSieveDiscourseEntity(); // in perfect-boundaries mode, we only create markables that // we can find in the key if (!_perfectBoundaries || m_coref != null) { inTextMarkables.add(mention); } //sort utterances Collections.sort(utterances); //sort CFs within utterances for (int i = 0; i < utterances.size(); i++) { Collections.sort(utterances.get(i).getCFs()); } //Assign numbers to CFs for (int i = 0; i < utterances.size(); i++) { ArrayList<Mention> CFs = utterances.get(i).getCFs(); for (int j = 0; j < CFs.size(); j++) { CFs.get(j).setUttPos(j); if (CFs.get(j).getIsFirstMention()) { CFs.get(j).getDiscourseEntity().set_firstMention_isFirstMention(true); } } } } } return inTextMarkables; }
From source file:elkfed.mmax.importer.DetermineMinSpan.java
License:Apache License
/** adds min_ids and min_words attribute to all markables on the * coref level that do not currently have it * uses the parse trees if available.// w w w . j a v a 2s .c o m * @param doc the Minidiscourse document */ public static void addMinSpanAttrs(MiniDiscourse doc) throws IOException { List<Markable> parses = DiscourseUtils.getMarkables(doc, DEFAULT_PARSE_LEVEL); List<Markable> coref_tags = DiscourseUtils.getMarkables(doc, DEFAULT_COREF_LEVEL); List<String> tokens = Arrays.asList(doc.getTokens()); int parses_idx = 0; for (Markable mk : coref_tags) { while (parses_idx < parses.size() && parses.get(parses_idx).getRightmostDiscoursePosition() < mk .getRightmostDiscoursePosition()) { parses_idx++; } Markable parseMarkable = parses.get(parses_idx); Tree parse = Tree.valueOf(parseMarkable.getAttributeValue(PipelineComponent.TAG_ATTRIBUTE)); addMinSpan(parseMarkable.getLeftmostDiscoursePosition(), parse, mk, tokens); } }
From source file:elkfed.mmax.pipeline.P2Chunker.java
License:Apache License
/** Add parser, part of speech, and chunk markables */ protected void addMarkables() { final StringBuffer markableBuffer = new StringBuffer(); List<Markable> sentences = null; for (Markable parseMarkable : DiscourseUtils.getMarkables(currentDocument, DEFAULT_PARSE_LEVEL)) { int start = parseMarkable.getLeftmostDiscoursePosition(); int end = parseMarkable.getRightmostDiscoursePosition(); /** Retrieve chunk tags from the parse tree and add chunk markables */ /* traverse parse-tree (real tree, not string), extract basic NPs and poss */ Tree pTree = null;/*from w ww . j a v a2s . c om*/ pTree = Tree.valueOf(parseMarkable.getAttributeValue(PipelineComponent.TAG_ATTRIBUTE)); normalizeTree(pTree); if (pTree == null) continue; //add all basic nps for (Iterator<Tree> treeIt = pTree.iterator(); treeIt.hasNext();) { Tree nod = treeIt.next(); if (nod.value().equals("NP" + NPSTATUS_SEPARATOR + "1") || nod.value().equals("NP" + NPSTATUS_SEPARATOR + "2")) { markableBuffer.setLength(0); addChunkMarkable(nod, pTree, start, false); } } List<Tree> Leaves = pTree.getLeaves(); // add NPs embedding possessives for (Tree l : Leaves) { if (l.value().toLowerCase().startsWith("'s")) { if (l.parent(pTree) != null && l.parent(pTree).value().equals("POS") && l.parent(pTree).parent(pTree) != null && l.parent(pTree).parent(pTree).value().startsWith("NP") && l.parent(pTree).parent(pTree).parent(pTree) != null && l.parent(pTree).parent(pTree) .parent(pTree).value().equals("NP" + NPSTATUS_SEPARATOR + "0")) { Tree nod = l.parent(pTree).parent(pTree).parent(pTree); markableBuffer.setLength(0); addChunkMarkable(nod, pTree, start, true); } } } } }
From source file:elkfed.mmax.pipeline.Parser.java
License:Apache License
/** Add parser, part of speech, and chunk markables */ protected void addMarkables() { final StringBuffer markableBuffer = new StringBuffer(); List<Markable> sentences = null; try {//from w ww .j a v a2s .c o m sentences = DiscourseUtils.getSentences(currentDocument); } catch (Exception mmax2e) { mmax2e.printStackTrace(); } for (int sentence = 0; sentence < sentences.size(); sentence++) { /** Add the parse tree markables */ final Map<String, String> attributes = new HashMap<String, String>(levelAttributes); attributes.put(TAG_ATTRIBUTE, forest.get(sentence).replaceAll("&", "&")); markableBuffer.setLength(0); Markable sent_m = sentences.get(sentence); int start = sent_m.getLeftmostDiscoursePosition(); int end = sent_m.getRightmostDiscoursePosition(); currentLevel.addMarkable(start, end, attributes); /** Retrieve chunk tags from the parse tree and add chunk markables */ boolean inNP = false; int startNP = -1; int wordLoc = 0; int depth = 0; for (String tok : forest.get(sentence).replaceAll("\\)", ") ").split("\\s+")) { if (tok.matches("\\(NP")) { inNP = true; startNP = wordLoc; depth = 0; } if ((inNP) && (tok.matches(".*\\)"))) { depth--; } if ((inNP) && (tok.matches("\\(.*"))) { depth++; } if (tok.matches(".+\\)")) { wordLoc++; } if ((depth == 0) && (inNP)) { inNP = false; final Map<String, String> cAttributes = new HashMap<String, String>(chunkAttributes); markableBuffer.setLength(0); cAttributes.put(TAG_ATTRIBUTE, "np"); //TODO: check if it's not start+wordLoc-1 ? chunkLevel.addMarkable(start + startNP, start + wordLoc - 1, cAttributes); } } /** Create a tree object from the current sentence */ Tree currentTree = new LabeledScoredTreeNode(); // System.err.println("processing sentence: "+forest.get(sentence)); currentTree = (LabeledScoredTreeNode) Tree.valueOf(forest.get(sentence)); /** Retrieve POS tags from the parse tree */ List<Label> taggedSent = new ArrayList<Label>(currentTree.preTerminalYield()); for (int i = 0; i < taggedSent.size(); i++) { posTags.add(taggedSent.get(i).value()); } } /** Add POS tag markables */ for (int pos = 0; pos < posTags.size(); pos++) { final HashMap<String, String> attributes = new HashMap<String, String>(posAttributes); attributes.put(TAG_ATTRIBUTE, posTags.get(pos).toLowerCase()); posLevel.addMarkable(pos, pos, attributes); } }
From source file:elkfed.mmax.pipeline.SemTagger.java
License:Apache License
/** Sets the list of semantic roles of a given document */ private void initDocument() { // reset the pool of semantic roles and markables of the corrent doc this.semroles.clear(); this.markables.clear(); this.parseTrees.clear(); this.parseStart.clear(); this.parseEnd.clear(); // and get the new ones MarkableLevel semrole_level = currentDocument.getMarkableLevelByName(DEFAULT_SEMROLE_LEVEL); MarkableQuery q = new MarkableQuery(semrole_level); q.addAttCondition("tag", "target", MarkableQuery.OP_NE); this.semroles = q.execute(semrole_level, MiniDiscourse.DISCOURSEORDERCMP); this.markables = currentLevel.getMarkables(MiniDiscourse.DISCOURSEORDERCMP); for (Markable parseMarkable : DiscourseUtils.getMarkables(currentDocument, DEFAULT_PARSE_LEVEL)) { Tree currParseTree = null;// w w w .java2 s .c o m currParseTree = Tree.valueOf(parseMarkable.getAttributeValue(PipelineComponent.TAG_ATTRIBUTE)); normalizeTree(currParseTree); parseTrees.add(currParseTree); parseStart.add(parseMarkable.getLeftmostDiscoursePosition()); parseEnd.add(parseMarkable.getRightmostDiscoursePosition()); } }
From source file:opennlp.tools.parse_thicket.external_rst.ParseCorefBuilderWithNERandRST.java
License:Apache License
public ParseThicketWithDiscourseTree buildParseThicket(String text) { List<Tree> ptTrees = new ArrayList<Tree>(); List<WordWordInterSentenceRelationArc> arcs = new ArrayList<WordWordInterSentenceRelationArc>(); List<List<ParseTreeNode>> nodesThicket = new ArrayList<List<ParseTreeNode>>(); Document doc = null;// ww w . ja v a2s.c om try { doc = proc.annotate(text, false); } catch (IllegalArgumentException iae) { log.severe("failed to parse text: " + text); } catch (Exception e) { e.printStackTrace(); } // failed to parse - skip this text if (doc == null) return null; // java.lang.IllegalArgumentException for (Sentence sentence : doc.sentences()) { List<ParseTreeNode> sentenceNodes = new ArrayList<ParseTreeNode>(); String[] tokens = sentence.words(); for (int i = 0; i < tokens.length; i++) { // sentence.startOffsets(), " ")); // sentence.endOffsets(), " ")); ParseTreeNode p = new ParseTreeNode(sentence.words()[i], sentence.tags().get()[i]); p.setId(i + 1); if (sentence.entities().isDefined()) { p.setNe(sentence.entities().get()[i]); } if (sentence.norms().isDefined()) { // p.setNormalizedWord(sentence.norms().get()[i]); p.setNormalizedWord(sentence.lemmas().get()[i]); } sentenceNodes.add(p); } if (sentence.dependencies().isDefined()) { int i = 0; DirectedGraphEdgeIterator<String> iterator = new DirectedGraphEdgeIterator<String>( sentence.dependencies().get()); while (iterator.hasNext()) { scala.Tuple3<Object, Object, String> dep = iterator.next(); // System.out.println(" head:" + dep._1() + " modifier:" + // dep._2() + " label:" + dep._3()); if (i > sentenceNodes.size() - 1) break; ParseTreeNode p = sentenceNodes.get(i); p.setHead(dep._1().toString()); p.setModifier(dep._2().toString()); p.setLabel(dep._3()); sentenceNodes.set(i, p); i++; } } if (sentence.syntacticTree().isDefined()) { Tree tree = Tree.valueOf(sentence.syntacticTree().get().toString()); ptTrees.add(tree); // tree.pennPrint(); } nodesThicket.add(sentenceNodes); } if (doc.coreferenceChains().isDefined()) { // these are scala.collection Iterator and Iterable (not Java!) scala.collection.Iterator<scala.collection.Iterable<CorefMention>> chains = doc.coreferenceChains() .get().getChains().iterator(); while (chains.hasNext()) { scala.collection.Iterator<CorefMention> chain = chains.next().iterator(); // System.out.println("Found one coreference chain containing // the following mentions:"); int numInChain = 0; int[] niSentence = new int[4], niWord = new int[4], startOffset = new int[4], endOffset = new int[4]; while (chain.hasNext()) { CorefMention mention = chain.next(); // note that all these offsets start at 0 too niSentence[numInChain] = mention.sentenceIndex(); niWord[numInChain] = mention.headIndex(); startOffset[numInChain] = mention.startOffset(); endOffset[numInChain] = mention.endOffset(); if (numInChain >= 4 - 1) break; numInChain++; // " headIndex:" + mention.headIndex() + // " startTokenOffset:" + mention.startOffset() + // " endTokenOffset:" + mention.endOffset()); } if (numInChain > 0) { // more than a single mention for (int i = 0; i < numInChain; i++) { ArcType arcType = new ArcType("coref-", "", 0, 0); WordWordInterSentenceRelationArc arc = new WordWordInterSentenceRelationArc( new Pair<Integer, Integer>(niSentence[i], niWord[i]), new Pair<Integer, Integer>(niSentence[i + 1], niWord[i + 1]), startOffset[i] + "", startOffset[i + 1] + "", arcType); arcs.add(arc); } } } } List<WordWordInterSentenceRelationArc> arcsCA = buildCAarcs(nodesThicket); arcs.addAll(arcsCA); ParseThicketWithDiscourseTree result = new ParseThicketWithDiscourseTree(ptTrees, arcs); if (doc.discourseTree().isDefined()) { Option<DiscourseTree> discourseTree = doc.discourseTree(); // scala.collection.immutable.List<DiscourseTree> scList = // discourseTree.toList(); scala.collection.Iterator<DiscourseTree> iterator = discourseTree.iterator(); while (iterator.hasNext()) { DiscourseTree dt = iterator.next(); result.setDt(dt); List<WordWordInterSentenceRelationArc> rstArcs = new ArrayList<WordWordInterSentenceRelationArc>(); navigateDiscourseTree(dt, rstArcs, nodesThicket); arcs.addAll(rstArcs); System.out.println(dt); System.out.println("first EDU = " + dt.firstEDU() + "| dt.firstSentence() = " + dt.firstSentence() + " \n| last EDU = " + dt.lastEDU() + "| dt.lastSentence() = " + dt.lastSentence() + " \n| dt.tokenCount() = " + dt.tokenCount() + "| dt.firstToken " + dt.firstToken() + " | dt.lastToken() " + dt.lastToken() + "\n kind =" + dt.kind() + " | text = " + dt.rawText()); StringBuilder sb = new StringBuilder(10000); System.out.println(sb); } } result.setOrigText(text); result.setNodesThicket(nodesThicket); result.setDtDump(); // sets the DT representation for TK learning return result; }
From source file:org.ets.research.nlp.stanford_thrift.general.CoreNLPThriftUtil.java
License:Open Source License
public static Annotation getAnnotationFromParseTrees(List<String> parseTrees) { List<CoreMap> sentences = new ArrayList<CoreMap>(); List<String> allTokens = new ArrayList<String>(); int tokenOffset = 0; for (String tree : parseTrees) { List<String> tokens = new ArrayList<String>(); String[] firstSplit = tree.split("\\) "); for (String f : firstSplit) { String[] secondSplit = f.split("\\("); String[] tagAndToken = secondSplit[secondSplit.length - 1].trim().replaceAll("\\)+$", "") .split(" "); tokens.add(tagAndToken[1]);/*from www. j a v a 2 s . com*/ } allTokens.addAll(tokens); String[] tokensArr = new String[tokens.size()]; tokens.toArray(tokensArr); List<CoreLabel> sentenceTokens = Sentence.toCoreLabelList(tokensArr); String originalText = Sentence.listToString(tokens); CoreMap sentence = new Annotation(originalText); sentence.set(CharacterOffsetBeginAnnotation.class, 0); sentence.set(CharacterOffsetEndAnnotation.class, sentenceTokens.get(sentenceTokens.size() - 1).get(TextAnnotation.class).length()); sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens); sentence.set(CoreAnnotations.TokenBeginAnnotation.class, tokenOffset); tokenOffset += sentenceTokens.size(); sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokenOffset); ParserAnnotatorUtils.fillInParseAnnotations(false, true, new EnglishGrammaticalStructureFactory(), sentence, Tree.valueOf(tree)); sentences.add(sentence); } Annotation allSentences = new Annotation(Sentence.listToString(allTokens)); allSentences.set(CoreAnnotations.SentencesAnnotation.class, adjustCharacterOffsets(sentences, true)); return allSentences; }
From source file:org.ets.research.nlp.stanford_thrift.parser.StanfordParserThrift.java
License:Open Source License
/** If one were to call any of these other methods to get a parse tree for some input sentence * with the -outputFormatOptions flag of "lexicalize", they would receive their parse tree, * in the -outputFormat of their choice, with every leaf marked with it's head word. * This function does exactly that on an existing parse tree. * NOTE that this WILL re-lexicalize a pre-lexicalized tree, so don't pass in a tree that * has been lexicalized and expect to get back the same thing as what you passed in. *//*from w w w . j a v a 2s. c o m*/ public String lexicalize_parse_tree(String tree) throws TApplicationException { try { Tree parseTree = Tree.valueOf(tree); Tree lexicalizedTree = Trees.lexicalize(parseTree, tlp.headFinder()); treePrinter = ParserUtil.setOptions(null, tlp); // use defaults Function<Tree, Tree> a = TreeFunctions.getLabeledToDescriptiveCoreLabelTreeFunction(); lexicalizedTree = a.apply(lexicalizedTree); return ParserUtil.TreeObjectToString(lexicalizedTree, treePrinter); } catch (Exception e) { // FIXME throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage()); } }