Example usage for edu.stanford.nlp.trees Tree pennString

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree pennString.

Prototype

public String pennString()

Source Link

Document

Calls pennPrint() and saves output to a String

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java

License:Open Source License

public static void convertPennTree(JCas aJCas, Annotation aDocument) {
    for (CoreMap s : aDocument.get(SentencesAnnotation.class)) {
        Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
        int begin = s.get(CharacterOffsetBeginAnnotation.class);
        int end = s.get(CharacterOffsetEndAnnotation.class);

        // create tree with simple labels and get penn string from it
        tree = tree.deepCopy(tree.treeFactory(), StringLabel.factory());

        // write Penn Treebank-style string to cas
        PennTree pTree = new PennTree(aJCas, begin, end);
        pTree.setPennTree(tree.pennString());
        pTree.addToIndexes();/*w w w.  j  a v a 2 s  .com*/
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java

License:Open Source License

/**
 * Creates annotation with Penn Treebank style representations of the syntax tree
 * /*www. j a v a  2  s.co m*/
 * @param aBegin
 *            start offset.
 * @param aEnd
 *            end offset.
 */
public void createPennTreeAnnotation(int aBegin, int aEnd) {
    Tree t = tokenTree.getTree();

    // write Penn Treebank-style string to cas
    PennTree pTree = new PennTree(jCas, aBegin, aEnd);

    // create tree with simple labels and get penn string from it
    t = t.deepCopy(t.treeFactory(), StringLabel.factory());

    pTree.setPennTree(t.pennString());
    pTree.addToIndexes();
}

From source file:edu.albany.cubism.util.StanfordChineseParser.java

public void printTree(Tree t) {
    tp.printTree(t);//from  w  w w .j a v  a2s.  c om
    tp.printTree(t.headTerminal(new CollinsHeadFinder()));//SemanticHeadFinder()));
    //System.out.println("tree label: " + t.label());
    List trees = t.subTreeList();

    for (int i = 0; i < trees.size(); i++) {
        Tree sbt = (Tree) trees.get(i);
        /*
         * if (!sbt.isLeaf()) { trees.addAll(sbt.subTreeList()); }
         */
        //System.out.println("sbt lable: " + sbt.label());
    }
    //System.out.println("done");
    List<Tree> leaves = t.getLeaves();
    for (int i = 0; i < leaves.size(); i++) {
        Tree leaf = leaves.get(i);
        //if (leaf.parent() != null) {
        System.out.println(leaf.pennString() + " " + leaf.value());
        //}
    }
    /*
     * Set dependencies = t.dependencies(); Iterator it =
     * dependencies.iterator(); while (it.hasNext()) { Dependency dependency
     * = (Dependency)it.next(); System.out.println(dependency.toString());
     * System.out.println(dependency.name()); }
     */
}

From source file:elkfed.coref.discourse_entities.DiscourseEntity.java

License:Open Source License

private Tree massimoHeadFindHack(Tree npNode) {
    LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin();
    /*/*from  w ww.  ja  v  a2  s  . co  m*/
     * NOTE (yv):
     * We should really have a decent configurable head finder.
     * The "generic" head finder below probably works, but ...
     * this is ugly enough for English, but making it work for
     * English *and* Italian (and possibly other languages)
     * is only something for very enthusiastic people with
     * slight masochistic tendencies.
     */
    //CollinsHeadFinder hf = new CollinsHeadFinder();
    //ModCollinsHeadFinder hf = new ModCollinsHeadFinder();

    /* -- trivial -- */
    if (npNode.numChildren() == 0)
        return npNode;
    if (npNode.numChildren() == 1) {
        if (npNode.firstChild().numChildren() == 0)
            return npNode;
        return massimoHeadFindHack(npNode.firstChild());
    }
    /* -- coordination -- */
    if (npNode.numChildren() > 2) {
        for (Tree n : npNode.children()) {
            if (lang_plugin.labelCat(n.nodeString()) == NodeCategory.CC)
                return null;
        }
    }

    /* -- last child is a noun (common/proper) --*/
    /* NB: will it work for italian though? */

    NodeCategory firstpos = lang_plugin.labelCat(npNode.firstChild().nodeString());
    NodeCategory nextpos = lang_plugin.labelCat(npNode.getChild(1).nodeString());
    NodeCategory lastpos = lang_plugin.labelCat(npNode.lastChild().nodeString());

    if (lastpos == NodeCategory.CN)
        return npNode.lastChild();
    if (lastpos == NodeCategory.PN)
        return npNode.lastChild();

    /* -- (NP (NP (DT the) (NN man)) (PP (in from) (NP (NNP UNCLE)))) -- */

    if (firstpos == NodeCategory.NP && nextpos != NodeCategory.CN)
        return massimoHeadFindHack(npNode.firstChild());

    /* -- misc -- */

    Tree found_head = null;
    int state = 0;
    for (Tree n : npNode.children()) {
        NodeCategory ncat = lang_plugin.labelCat(n.nodeString());
        if (ncat == NodeCategory.CN || ncat == NodeCategory.PN || ncat == NodeCategory.PRO) {
            state = 4;
            found_head = n;
        } else if (ncat == NodeCategory.NP && state < 3) {
            state = 3;
            found_head = n;
        } else if (ncat == NodeCategory.ADJ && state < 3) {
            state = 2;
            found_head = n;
        }
    }
    if (found_head != null) {
        if (state == 3) {
            return massimoHeadFindHack(found_head);
        }
        return found_head;
    }

    //    if (ConfigProperties.getInstance().getDbgPrint()) 
    System.err.println("Couldn't find a head for NP:" + npNode.pennString());
    return null;
}

From source file:jnetention.nlp.demo.NLPAnalyzer.java

protected void update(String text) throws Exception {

    TextParse p = client.parse(text);/*  w  w  w . j  a v  a  2  s .  c  om*/

    StringBuilder s = new StringBuilder();
    s.append(p.getDependencies(true).toString());
    s.append('\n');
    //s.append(p.getCorefCluster().toString());
    s.append(p.getNamedEntities());
    s.append('\n');
    s.append(p.getWords());
    s.append('\n');
    s.append(p.getNamedEntities());
    s.append('\n');
    s.append(p.getTrees().stream().map((Tree t) -> (String) (t.pennString() + '\n'))
            .collect(Collectors.toList()));
    s.append(p.getVerbPhrases());
    s.append(p.getNounPhrases());

    Platform.runLater(new Runnable() {

        @Override
        public void run() {

            TextArea ta = new TextArea(s.toString());
            ta.setMaxHeight(Double.MAX_VALUE);
            ta.setMaxWidth(Double.MAX_VALUE);

            ScrollPane sp = new ScrollPane(ta);
            sp.setMaxHeight(Double.MAX_VALUE);
            sp.setMaxWidth(Double.MAX_VALUE);

            output.setCenter(ta);
        }

    });
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * For printing tree in a better format//  w w  w .ja va2s  .c o m
 */
public static String formatPennTree(Tree parseTree) {
    String treeString = parseTree.pennString();
    treeString = treeString.replaceAll("\\[TextAnnotation=", "");
    treeString = treeString.replaceAll("(NamedEntityTag|Value|Index|PartOfSpeech)Annotation.+?\\)", ")");
    treeString = treeString.replaceAll("\\[.+?\\]", "");
    return treeString;
}

From source file:org.aksw.simba.bengal.triple2nl.property.PropertyVerbalizer.java

License:Apache License

private PropertyVerbalization getTypeByLinguisticAnalysis(String propertyURI, String propertyText) {
    logger.debug("...using linguistical analysis...");
    Annotation document = new Annotation(propertyText);
    pipeline.annotate(document);/* w ww . j a  v  a 2 s  .co m*/
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    String pattern = "";
    PropertyVerbalizationType verbalizationType = PropertyVerbalizationType.UNSPECIFIED;
    boolean firstTokenAuxiliary = false;
    for (CoreMap sentence : sentences) {
        List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
        // get the first word and check if it's 'is' or 'has'
        CoreLabel token = tokens.get(0);
        String word = token.get(TextAnnotation.class);
        String pos = token.get(PartOfSpeechAnnotation.class);
        String lemma = token.getString(LemmaAnnotation.class);

        firstTokenAuxiliary = auxiliaryVerbs.contains(lemma);

        if (lemma.equals("be") || word.equals("have")) {
            pattern += lemma.toUpperCase();
        } else {
            if (pos.startsWith("N")) {
                pattern += "NP";
            } else if (pos.startsWith("V")) {
                pattern += "VP";
            } else {
                pattern += pos;
            }
        }
        if (tokens.size() > 1) {
            pattern += " ";
            for (int i = 1; i < tokens.size(); i++) {
                token = tokens.get(i);
                pos = token.get(PartOfSpeechAnnotation.class);
                if (pos.startsWith("N")) {
                    pattern += "NP";
                } else if (pos.startsWith("V")) {
                    pattern += "VP";
                } else {
                    pattern += pos;
                }
                pattern += " ";
            }
        }
        // get the parse tree
        Tree tree = sentence.get(TreeAnnotation.class);
        // skip ROOT tag
        tree = tree.skipRoot();
        logger.debug("Parse tree:" + tree.pennString());
        // tree.pennPrint();
        // check if VP is directly followed by NP
        // sometimes parent node is S,SINV,etc.
        if (tree.value().matches(Joiner.on('|').join(Lists.newArrayList(S, SBAR, SBARQ, SINV, FRAGMENT)))) {
            tree = tree.getChild(0);
        }
        boolean useDeterminer = false;
        if (tree.value().equals(VERB_PHRASE.getTag())) {
            for (Tree child : tree.getChildrenAsList()) {
                // check if first non terminal is NP and not contains a
                // determiner
                if (!child.isPreTerminal()) {
                    if (child.value().equals(NOUN_PHRASE.getTag())
                            && !child.getChild(0).value().equals(DETERMINER.getTag())) {
                        useDeterminer = true;
                    }
                    break;
                }
            }
        }
        // add determiner tag
        if (useDeterminer) {
            String[] split = pattern.split(" ");
            pattern = split[0] + " DET " + Joiner.on(" ").join(Arrays.copyOfRange(split, 1, split.length));
        }
    }
    pattern = pattern.trim();

    // if first token is an auxiliary can return verb
    if (firstTokenAuxiliary) {
        verbalizationType = PropertyVerbalizationType.VERB;
    }

    // check if pattern matches
    if (pattern.matches(VERB_PATTERN)) {
        logger.debug("...successfully determined type.");
        verbalizationType = PropertyVerbalizationType.VERB;
    }
    return new PropertyVerbalization(propertyURI, propertyText, pattern, verbalizationType);
}

From source file:org.lambda3.text.simplification.discourse.model.serializer.TreeSerializer.java

License:Open Source License

@Override
public void serialize(Tree value, JsonGenerator gen, SerializerProvider provider) throws IOException {
    gen.writeString(value.pennString().trim().replaceAll("\\s+", " ").replaceAll("[\\n\\t]", ""));
}

From source file:pltag.parser.json.JsonResult.java

License:Open Source License

private String toPtbFormat(String input) {
    Tree tree = Tree.valueOf(input);
    return tree.pennString();
}

From source file:qmul.align.SentenceSyntacticSimilarityMeasure.java

License:Open Source License

public static void main(String[] args) {
    SentenceSyntacticSimilarityMeasure sm = new SentenceSyntacticSimilarityMeasure();
    PennTreebankTokenizer tok = new PennTreebankTokenizer(true);
    StanfordParser p = new StanfordParser();
    // TreeParser p = new RASPParser();
    // String s1 = "Hello , how are you ?";
    // String s2 = "UNCLEAR what can I do for this lady today ?";
    String s1 = "You're full of catarrh.";
    String s2 = "Lot of wax in it, right enough.";
    p.parse(tok.getWordsFromString(s1));
    Tree t1 = p.getBestParse();
    p.parse(tok.getWordsFromString(s2));
    Tree t2 = p.getBestParse();/*from   w  w  w.j  a va 2 s .  c  om*/
    System.out.println(s1);
    System.out.println(t1.pennString());
    System.out.println(s2);
    System.out.println(t2.pennString());
    System.out.println(TreeKernel.resetAndCompute(t1, t2, TreeKernel.SYN_TREES));
    System.out.println(TreeKernel.resetAndCompute(t1, t2, TreeKernel.SUB_TREES));
    System.out.println(TreeKernel.resetAndCompute(t1, t2, TreeKernel.SUBSET_TREES));
    // System.exit(0);

    DialogueTurn t = new DialogueTurn("t", 1, null, null);
    DialogueSentence a = new DialogueSentence("a", 1, t, "ok");
    DialogueSentence b = new DialogueSentence("b", 1, t, "ok");
    a.setSyntax(p.parse(a.getTranscription()));
    b.setSyntax(p.parse(b.getTranscription()));
    System.out.println("" + a + " " + a.getSyntax() + "\n" + b + " " + b.getSyntax() + "\n" + "sim = "
            + sm.similarity(a, b));
    a.setTranscription("ok ok ok ok");
    a.setSyntax(p.parse(a.getTranscription()));
    System.out.println("" + a + " " + a.getSyntax() + "\n" + b + " " + b.getSyntax() + "\n" + "sim = "
            + sm.similarity(a, b));
    a.setTranscription("that's really not ok");
    a.setSyntax(p.parse(a.getTranscription()));
    System.out.println("" + a + " " + a.getSyntax() + "\n" + b + " " + b.getSyntax() + "\n" + "sim = "
            + sm.similarity(a, b));
    b.setTranscription("that's really not ok");
    b.setSyntax(p.parse(b.getTranscription()));
    System.out.println("" + a + " " + a.getSyntax() + "\n" + b + " " + b.getSyntax() + "\n" + "sim = "
            + sm.similarity(a, b));
    a.setTranscription("john likes the small bear");
    a.setSyntax(p.parse(a.getTranscription()));
    b.setTranscription("jim hates the big rabbit");
    b.setSyntax(p.parse(b.getTranscription()));
    // TreeKernel.setIncludeWords(true);
    System.out.println("" + a + " " + a.getSyntax() + "\n" + b + " " + b.getSyntax() + "\n" + "sim = "
            + sm.similarity(a, b) + "\n" + sm.rawCountsA() + "\n" + sm.rawCountsB() + "\n" + sm.rawCountsAB());
    a.setTranscription("the man likes the small bear");
    a.setSyntax(p.parse(a.getTranscription()));
    // TreeKernel.setIncludeWords(true);
    System.out.println("" + a + " " + a.getSyntax() + "\n" + b + " " + b.getSyntax() + "\n" + "sim = "
            + sm.similarity(a, b) + "\n" + sm.rawCountsA() + "\n" + sm.rawCountsB() + "\n" + sm.rawCountsAB());
}