Example usage for edu.stanford.nlp.trees Tree valueOf

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree valueOf.

Prototype

public static Tree valueOf(String str)

Source Link

Document

This gives you a tree from a String representation (as a bracketed Tree, of the kind produced by toString() , pennPrint() , or as in the Penn Treebank).

Usage

From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java

License:Open Source License

private List<String> decompose(String documentText) {
    List<Tree> trees = new ArrayList<Tree>();
    for (String sentence : AnalysisUtilities.getSentences(documentText)) {
        trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse);
    }//from w w w .ja  va  2s .co m

    List<String> result = new ArrayList<String>();

    for (Tree t : trees) {
        TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) ");
        TregexMatcher m = p.matcher(t);
        while (m.find()) {
            Tree np = m.getNode("np");
            Tree vp = m.getNode("vp");

            Tree np2 = np.deepCopy();
            TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)");
            List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
            ps.add(Tsurgeon.parseOperation("prune sbarq"));
            ps.add(Tsurgeon.parseOperation("prune c1"));
            ps.add(Tsurgeon.parseOperation("prune c2"));

            Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2);
            np = np2;

            Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))");
            result.add(AnalysisUtilities.orginialSentence(newTree.yield()));
        }

    }

    return result;
}

From source file:edu.nus.comp.nlp.tool.anaphoraresolution.AnnotatedText.java

License:Open Source License

private AnnotatedText(List<String> sentences) {
    rootNode = new DefaultMutableTreeNode();
    for (int i = 0; i < sentences.size(); i++) {
        String sentence = sentences.get(i);
        TreeAdapter adpater = new TreeAdapter(Tree.valueOf(sentence), i);
        DefaultMutableTreeNode tn = adpater.getDefaultMutableTreeNode();
        rootNode.add(tn);/*from w w w .  j ava  2  s  .  c o m*/
    }

    // rootNode = buildParseTree(sents);
    NPExtractor ex = new NPExtractor(rootNode);
    NPList = ex.getNPList();
    PRPList = ex.getPRPList();

    identifyPleonasticPronoun(rootNode);
    SNPList = buildSNPList(NPList);
}

From source file:elkfed.coref.mentions.AbstractMentionFactory.java

License:Apache License

public List<Mention> extractMentions(MiniDiscourse doc) throws IOException {
    _currentText = CorefDocuments.getInstance().getText(doc);
    ArrayList<Tree> parseTrees = new ArrayList<Tree>();
    ArrayList<Integer> parseStart = new ArrayList<Integer>();
    ArrayList<Integer> parseEnd = new ArrayList<Integer>();
    ArrayList<Utterance> utterances = new ArrayList<Utterance>();
    // extract parses and create utterances for each sentence
    for (Markable parseMarkable : DiscourseUtils.getMarkables(doc, DEFAULT_PARSE_LEVEL)) {
        Tree currParseTree = postProcess(
                Tree.valueOf(parseMarkable.getAttributeValue(PipelineComponent.TAG_ATTRIBUTE)));
        Utterance currUtt = new Utterance(currParseTree);
        parseTrees.add(currParseTree);/*  w  ww . jav a 2  s. c o  m*/
        parseStart.add(parseMarkable.getLeftmostDiscoursePosition());
        parseEnd.add(parseMarkable.getRightmostDiscoursePosition());
        currUtt.setLeftBoundary(parseMarkable.getLeftmostDiscoursePosition());
        currUtt.setRightBoundary(parseMarkable.getRightmostDiscoursePosition());
        utterances.add(currUtt);
    }
    ArrayList<Mention> inTextMarkables = new ArrayList<Mention>();
    for (Markable m_markable : DiscourseUtils.getMarkables(doc, DEFAULT_MARKABLE_LEVEL)) {
        if (keepMarkable(m_markable)) {
            Markable m_coref = CorefDocuments.getInstance().markableIsaCorefElement(doc, m_markable);
            Mention mention = new Mention(m_markable, doc);
            if (m_coref != null) {
                mention.setSetID(m_coref.getAttributeValue(COREF_SET_ATTRIBUTE));
                reportMapping(m_markable, m_coref);
            } else {
                reportMapping(m_markable, null);
            }
            // find the parse tree that this markable is in
            int startPos = m_markable.getLeftmostDiscoursePosition();
            int endPos = m_markable.getRightmostDiscoursePosition();
            int endPosP = m_markable.getRightmostDiscoursePosition();
            mention.setStartWord(startPos);
            mention.setEndWord(endPos);
            if (m_markable.getAttributeValue("min_ids") != null) {
                String[] spans = MarkableHelper.parseRanges(m_markable.getAttributeValue("min_ids"));
                startPos = doc.DiscoursePositionFromDiscourseElementID(spans[0]);
                endPos = doc.DiscoursePositionFromDiscourseElementID(spans[spans.length - 1]);
            }
            Boolean found = false;
            for (int i = 0; i < parseTrees.size() && !found; i++) {
                final int sentStart = parseStart.get(i);
                final int sentEnd = parseEnd.get(i);

                /*
                                    if (startPos >= sentStart && endPos <= sentEnd) {
                * gold/carafe markables may disrespect sentence boundaries :((
                * they should still receive at least some sentence information though
                */
                if (startPos >= sentStart && startPos <= sentEnd) {
                    found = true;
                    int startOff = startPos - sentStart;
                    int endOff = endPosP - sentStart;
                    Utterance utt = utterances.get(i);
                    mention.setSentenceStart(sentStart);
                    mention.setSentenceEnd(sentEnd);
                    mention.setParseInfo(parseTrees.get(i), startOff, endOff);
                    mention.setUtterance(utt);
                }
            }
            mention.createDiscourseEntity();
            mention.createSieveDiscourseEntity();
            // in perfect-boundaries mode, we only create markables that
            // we can find in the key
            if (!_perfectBoundaries || m_coref != null) {
                inTextMarkables.add(mention);
            }
            //sort utterances
            Collections.sort(utterances);
            //sort CFs within utterances
            for (int i = 0; i < utterances.size(); i++) {
                Collections.sort(utterances.get(i).getCFs());
            }
            //Assign numbers to CFs
            for (int i = 0; i < utterances.size(); i++) {
                ArrayList<Mention> CFs = utterances.get(i).getCFs();
                for (int j = 0; j < CFs.size(); j++) {
                    CFs.get(j).setUttPos(j);
                    if (CFs.get(j).getIsFirstMention()) {
                        CFs.get(j).getDiscourseEntity().set_firstMention_isFirstMention(true);
                    }
                }
            }
        }
    }
    return inTextMarkables;
}

From source file:elkfed.mmax.importer.DetermineMinSpan.java

License:Apache License

/** adds min_ids and min_words attribute to all markables on the
 * coref level that do not currently have it
 * uses the parse trees if available.//  w w  w  . j a  v a 2s .c  o m
 * @param doc the Minidiscourse document
 */
public static void addMinSpanAttrs(MiniDiscourse doc) throws IOException {
    List<Markable> parses = DiscourseUtils.getMarkables(doc, DEFAULT_PARSE_LEVEL);
    List<Markable> coref_tags = DiscourseUtils.getMarkables(doc, DEFAULT_COREF_LEVEL);
    List<String> tokens = Arrays.asList(doc.getTokens());
    int parses_idx = 0;
    for (Markable mk : coref_tags) {
        while (parses_idx < parses.size() && parses.get(parses_idx).getRightmostDiscoursePosition() < mk
                .getRightmostDiscoursePosition()) {
            parses_idx++;
        }
        Markable parseMarkable = parses.get(parses_idx);
        Tree parse = Tree.valueOf(parseMarkable.getAttributeValue(PipelineComponent.TAG_ATTRIBUTE));
        addMinSpan(parseMarkable.getLeftmostDiscoursePosition(), parse, mk, tokens);
    }
}

From source file:elkfed.mmax.pipeline.P2Chunker.java

License:Apache License

/** Add parser, part of speech, and chunk markables */
protected void addMarkables() {

    final StringBuffer markableBuffer = new StringBuffer();
    List<Markable> sentences = null;

    for (Markable parseMarkable : DiscourseUtils.getMarkables(currentDocument, DEFAULT_PARSE_LEVEL)) {

        int start = parseMarkable.getLeftmostDiscoursePosition();
        int end = parseMarkable.getRightmostDiscoursePosition();

        /** Retrieve chunk tags from the parse tree and add chunk markables */

        /* traverse parse-tree (real tree, not string), extract basic NPs and poss */

        Tree pTree = null;/*from  w  ww  .  j a v  a2s . c  om*/
        pTree = Tree.valueOf(parseMarkable.getAttributeValue(PipelineComponent.TAG_ATTRIBUTE));
        normalizeTree(pTree);

        if (pTree == null)
            continue;

        //add all basic nps
        for (Iterator<Tree> treeIt = pTree.iterator(); treeIt.hasNext();) {
            Tree nod = treeIt.next();
            if (nod.value().equals("NP" + NPSTATUS_SEPARATOR + "1")
                    || nod.value().equals("NP" + NPSTATUS_SEPARATOR + "2")) {
                markableBuffer.setLength(0);
                addChunkMarkable(nod, pTree, start, false);
            }
        }

        List<Tree> Leaves = pTree.getLeaves();

        // add NPs embedding possessives
        for (Tree l : Leaves) {
            if (l.value().toLowerCase().startsWith("'s")) {

                if (l.parent(pTree) != null && l.parent(pTree).value().equals("POS")
                        && l.parent(pTree).parent(pTree) != null
                        && l.parent(pTree).parent(pTree).value().startsWith("NP")
                        && l.parent(pTree).parent(pTree).parent(pTree) != null && l.parent(pTree).parent(pTree)
                                .parent(pTree).value().equals("NP" + NPSTATUS_SEPARATOR + "0")) {
                    Tree nod = l.parent(pTree).parent(pTree).parent(pTree);
                    markableBuffer.setLength(0);
                    addChunkMarkable(nod, pTree, start, true);

                }

            }

        }
    }
}

From source file:elkfed.mmax.pipeline.Parser.java

License:Apache License

/** Add parser, part of speech, and chunk markables */
protected void addMarkables() {

    final StringBuffer markableBuffer = new StringBuffer();
    List<Markable> sentences = null;
    try {//from   w  ww  .j  a v  a2s  .c  o m
        sentences = DiscourseUtils.getSentences(currentDocument);
    } catch (Exception mmax2e) {
        mmax2e.printStackTrace();
    }

    for (int sentence = 0; sentence < sentences.size(); sentence++) {
        /** Add the parse tree markables */
        final Map<String, String> attributes = new HashMap<String, String>(levelAttributes);
        attributes.put(TAG_ATTRIBUTE, forest.get(sentence).replaceAll("&", "&amp;"));
        markableBuffer.setLength(0);
        Markable sent_m = sentences.get(sentence);
        int start = sent_m.getLeftmostDiscoursePosition();
        int end = sent_m.getRightmostDiscoursePosition();
        currentLevel.addMarkable(start, end, attributes);

        /** Retrieve chunk tags from the parse tree and add chunk markables */
        boolean inNP = false;
        int startNP = -1;
        int wordLoc = 0;
        int depth = 0;
        for (String tok : forest.get(sentence).replaceAll("\\)", ") ").split("\\s+")) {
            if (tok.matches("\\(NP")) {
                inNP = true;
                startNP = wordLoc;
                depth = 0;
            }

            if ((inNP) && (tok.matches(".*\\)"))) {
                depth--;
            }
            if ((inNP) && (tok.matches("\\(.*"))) {
                depth++;
            }

            if (tok.matches(".+\\)")) {
                wordLoc++;
            }

            if ((depth == 0) && (inNP)) {
                inNP = false;
                final Map<String, String> cAttributes = new HashMap<String, String>(chunkAttributes);
                markableBuffer.setLength(0);
                cAttributes.put(TAG_ATTRIBUTE, "np");
                //TODO: check if it's not start+wordLoc-1 ?
                chunkLevel.addMarkable(start + startNP, start + wordLoc - 1, cAttributes);
            }

        }

        /** Create a tree object from the current sentence */
        Tree currentTree = new LabeledScoredTreeNode();
        // System.err.println("processing sentence: "+forest.get(sentence));
        currentTree = (LabeledScoredTreeNode) Tree.valueOf(forest.get(sentence));

        /** Retrieve POS tags from the parse tree */
        List<Label> taggedSent = new ArrayList<Label>(currentTree.preTerminalYield());
        for (int i = 0; i < taggedSent.size(); i++) {
            posTags.add(taggedSent.get(i).value());
        }
    }

    /** Add POS tag markables */
    for (int pos = 0; pos < posTags.size(); pos++) {
        final HashMap<String, String> attributes = new HashMap<String, String>(posAttributes);
        attributes.put(TAG_ATTRIBUTE, posTags.get(pos).toLowerCase());
        posLevel.addMarkable(pos, pos, attributes);
    }
}

From source file:elkfed.mmax.pipeline.SemTagger.java

License:Apache License

/** Sets the list of semantic roles of a given document */
private void initDocument() {
    // reset the pool of semantic roles and markables of the corrent doc
    this.semroles.clear();
    this.markables.clear();

    this.parseTrees.clear();
    this.parseStart.clear();
    this.parseEnd.clear();

    // and get the new ones
    MarkableLevel semrole_level = currentDocument.getMarkableLevelByName(DEFAULT_SEMROLE_LEVEL);
    MarkableQuery q = new MarkableQuery(semrole_level);
    q.addAttCondition("tag", "target", MarkableQuery.OP_NE);
    this.semroles = q.execute(semrole_level, MiniDiscourse.DISCOURSEORDERCMP);
    this.markables = currentLevel.getMarkables(MiniDiscourse.DISCOURSEORDERCMP);

    for (Markable parseMarkable : DiscourseUtils.getMarkables(currentDocument, DEFAULT_PARSE_LEVEL)) {
        Tree currParseTree = null;//  w w w  .java2  s .c  o  m
        currParseTree = Tree.valueOf(parseMarkable.getAttributeValue(PipelineComponent.TAG_ATTRIBUTE));
        normalizeTree(currParseTree);

        parseTrees.add(currParseTree);
        parseStart.add(parseMarkable.getLeftmostDiscoursePosition());
        parseEnd.add(parseMarkable.getRightmostDiscoursePosition());
    }
}

From source file:opennlp.tools.parse_thicket.external_rst.ParseCorefBuilderWithNERandRST.java

License:Apache License

public ParseThicketWithDiscourseTree buildParseThicket(String text) {
    List<Tree> ptTrees = new ArrayList<Tree>();
    List<WordWordInterSentenceRelationArc> arcs = new ArrayList<WordWordInterSentenceRelationArc>();
    List<List<ParseTreeNode>> nodesThicket = new ArrayList<List<ParseTreeNode>>();

    Document doc = null;// ww  w  . ja v  a2s.c  om
    try {
        doc = proc.annotate(text, false);
    } catch (IllegalArgumentException iae) {
        log.severe("failed to parse text: " + text);
    } catch (Exception e) {
        e.printStackTrace();
    }
    // failed to parse - skip this text
    if (doc == null)
        return null;
    // java.lang.IllegalArgumentException
    for (Sentence sentence : doc.sentences()) {
        List<ParseTreeNode> sentenceNodes = new ArrayList<ParseTreeNode>();
        String[] tokens = sentence.words();
        for (int i = 0; i < tokens.length; i++) {
            // sentence.startOffsets(), " "));
            // sentence.endOffsets(), " "));
            ParseTreeNode p = new ParseTreeNode(sentence.words()[i], sentence.tags().get()[i]);
            p.setId(i + 1);
            if (sentence.entities().isDefined()) {
                p.setNe(sentence.entities().get()[i]);
            }
            if (sentence.norms().isDefined()) {
                // p.setNormalizedWord(sentence.norms().get()[i]);
                p.setNormalizedWord(sentence.lemmas().get()[i]);
            }
            sentenceNodes.add(p);
        }

        if (sentence.dependencies().isDefined()) {
            int i = 0;
            DirectedGraphEdgeIterator<String> iterator = new DirectedGraphEdgeIterator<String>(
                    sentence.dependencies().get());
            while (iterator.hasNext()) {
                scala.Tuple3<Object, Object, String> dep = iterator.next();
                // System.out.println(" head:" + dep._1() + " modifier:" +
                // dep._2() + " label:" + dep._3());
                if (i > sentenceNodes.size() - 1)
                    break;
                ParseTreeNode p = sentenceNodes.get(i);
                p.setHead(dep._1().toString());
                p.setModifier(dep._2().toString());
                p.setLabel(dep._3());
                sentenceNodes.set(i, p);
                i++;
            }
        }
        if (sentence.syntacticTree().isDefined()) {
            Tree tree = Tree.valueOf(sentence.syntacticTree().get().toString());
            ptTrees.add(tree);
            // tree.pennPrint();
        }
        nodesThicket.add(sentenceNodes);
    }

    if (doc.coreferenceChains().isDefined()) {
        // these are scala.collection Iterator and Iterable (not Java!)
        scala.collection.Iterator<scala.collection.Iterable<CorefMention>> chains = doc.coreferenceChains()
                .get().getChains().iterator();
        while (chains.hasNext()) {
            scala.collection.Iterator<CorefMention> chain = chains.next().iterator();
            // System.out.println("Found one coreference chain containing
            // the following mentions:");
            int numInChain = 0;
            int[] niSentence = new int[4], niWord = new int[4], startOffset = new int[4],
                    endOffset = new int[4];

            while (chain.hasNext()) {
                CorefMention mention = chain.next();
                // note that all these offsets start at 0 too
                niSentence[numInChain] = mention.sentenceIndex();
                niWord[numInChain] = mention.headIndex();
                startOffset[numInChain] = mention.startOffset();
                endOffset[numInChain] = mention.endOffset();
                if (numInChain >= 4 - 1)
                    break;
                numInChain++;
                // " headIndex:" + mention.headIndex() +
                // " startTokenOffset:" + mention.startOffset() +
                // " endTokenOffset:" + mention.endOffset());
            }
            if (numInChain > 0) { // more than a single mention
                for (int i = 0; i < numInChain; i++) {
                    ArcType arcType = new ArcType("coref-", "", 0, 0);

                    WordWordInterSentenceRelationArc arc = new WordWordInterSentenceRelationArc(
                            new Pair<Integer, Integer>(niSentence[i], niWord[i]),
                            new Pair<Integer, Integer>(niSentence[i + 1], niWord[i + 1]), startOffset[i] + "",
                            startOffset[i + 1] + "", arcType);
                    arcs.add(arc);
                }
            }
        }
    }

    List<WordWordInterSentenceRelationArc> arcsCA = buildCAarcs(nodesThicket);
    arcs.addAll(arcsCA);
    ParseThicketWithDiscourseTree result = new ParseThicketWithDiscourseTree(ptTrees, arcs);

    if (doc.discourseTree().isDefined()) {
        Option<DiscourseTree> discourseTree = doc.discourseTree();

        // scala.collection.immutable.List<DiscourseTree> scList =
        // discourseTree.toList();
        scala.collection.Iterator<DiscourseTree> iterator = discourseTree.iterator();
        while (iterator.hasNext()) {
            DiscourseTree dt = iterator.next();
            result.setDt(dt);
            List<WordWordInterSentenceRelationArc> rstArcs = new ArrayList<WordWordInterSentenceRelationArc>();
            navigateDiscourseTree(dt, rstArcs, nodesThicket);
            arcs.addAll(rstArcs);
            System.out.println(dt);
            System.out.println("first EDU = " + dt.firstEDU() + "| dt.firstSentence() = " + dt.firstSentence()
                    + " \n| last EDU = " + dt.lastEDU() + "| dt.lastSentence() = " + dt.lastSentence()
                    + " \n| dt.tokenCount() = " + dt.tokenCount() + "| dt.firstToken " + dt.firstToken()
                    + " | dt.lastToken() " + dt.lastToken() + "\n kind =" + dt.kind() + " | text = "
                    + dt.rawText());
            StringBuilder sb = new StringBuilder(10000);
            System.out.println(sb);
        }
    }

    result.setOrigText(text);
    result.setNodesThicket(nodesThicket);

    result.setDtDump(); // sets the DT representation for TK learning
    return result;
}

From source file:org.ets.research.nlp.stanford_thrift.general.CoreNLPThriftUtil.java

License:Open Source License

public static Annotation getAnnotationFromParseTrees(List<String> parseTrees) {
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    List<String> allTokens = new ArrayList<String>();
    int tokenOffset = 0;
    for (String tree : parseTrees) {
        List<String> tokens = new ArrayList<String>();
        String[] firstSplit = tree.split("\\) ");
        for (String f : firstSplit) {
            String[] secondSplit = f.split("\\(");
            String[] tagAndToken = secondSplit[secondSplit.length - 1].trim().replaceAll("\\)+$", "")
                    .split(" ");
            tokens.add(tagAndToken[1]);/*from  www. j  a v a 2 s  . com*/
        }
        allTokens.addAll(tokens);
        String[] tokensArr = new String[tokens.size()];
        tokens.toArray(tokensArr);
        List<CoreLabel> sentenceTokens = Sentence.toCoreLabelList(tokensArr);
        String originalText = Sentence.listToString(tokens);

        CoreMap sentence = new Annotation(originalText);
        sentence.set(CharacterOffsetBeginAnnotation.class, 0);
        sentence.set(CharacterOffsetEndAnnotation.class,
                sentenceTokens.get(sentenceTokens.size() - 1).get(TextAnnotation.class).length());
        sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
        sentence.set(CoreAnnotations.TokenBeginAnnotation.class, tokenOffset);
        tokenOffset += sentenceTokens.size();
        sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokenOffset);
        ParserAnnotatorUtils.fillInParseAnnotations(false, true, new EnglishGrammaticalStructureFactory(),
                sentence, Tree.valueOf(tree));

        sentences.add(sentence);
    }

    Annotation allSentences = new Annotation(Sentence.listToString(allTokens));
    allSentences.set(CoreAnnotations.SentencesAnnotation.class, adjustCharacterOffsets(sentences, true));

    return allSentences;
}

From source file:org.ets.research.nlp.stanford_thrift.parser.StanfordParserThrift.java

License:Open Source License

/** If one were to call any of these other methods to get a parse tree for some input sentence
 * with the -outputFormatOptions flag of "lexicalize", they would receive their parse tree,
 * in the -outputFormat of their choice, with every leaf marked with it's head word.
 * This function does exactly that on an existing parse tree.
 * NOTE that this WILL re-lexicalize a pre-lexicalized tree, so don't pass in a tree that
 * has been lexicalized and expect to get back the same thing as what you passed in.
 *//*from   w w w . j a  v a 2s. c o  m*/
public String lexicalize_parse_tree(String tree) throws TApplicationException {
    try {
        Tree parseTree = Tree.valueOf(tree);
        Tree lexicalizedTree = Trees.lexicalize(parseTree, tlp.headFinder());
        treePrinter = ParserUtil.setOptions(null, tlp); // use defaults
        Function<Tree, Tree> a = TreeFunctions.getLabeledToDescriptiveCoreLabelTreeFunction();
        lexicalizedTree = a.apply(lexicalizedTree);
        return ParserUtil.TreeObjectToString(lexicalizedTree, treePrinter);
    } catch (Exception e) {
        // FIXME
        throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage());
    }
}