Example usage for edu.stanford.nlp.trees LabeledScoredTreeFactory LabeledScoredTreeFactory

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees LabeledScoredTreeFactory LabeledScoredTreeFactory.

Prototype

public LabeledScoredTreeFactory()

Source Link

Document

Make a TreeFactory that produces LabeledScoredTree trees.

Usage

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public boolean parse(List<? extends HasWord> sentence) {
    if (tf == null) {
        tf = new LabeledScoredTreeFactory();
    }//from w  w w .  j  ava  2  s  .c  o  m
    lr = null; // better nullPointer exception than silent error
    //System.out.println("is it a taggedword?" + (sentence.get(0) instanceof TaggedWord)); //debugging
    if (sentence != this.sentence) {
        this.sentence = sentence;
        floodTags = false;
    }
    if (op.testOptions.verbose) {
        Timing.tick("Starting pcfg parse.");
    }
    if (spillGuts) {
        tick("Starting PCFG parse...");
    }
    length = sentence.size();
    if (length > arraySize) {
        considerCreatingArrays(length);
    }
    int goal = stateIndex.indexOf(goalStr);
    if (op.testOptions.verbose) {
        // System.out.println(numStates + " states, " + goal + " is the goal state.");
        // System.err.println(new ArrayList(ug.coreRules.keySet()));
        System.err.print("Initializing PCFG...");
    }
    // map input words to words array (wordIndex ints)
    words = new int[length];
    beginOffsets = new int[length];
    endOffsets = new int[length];
    originalCoreLabels = new CoreLabel[length];
    originalTags = new HasTag[length];
    int unk = 0;
    StringBuilder unkWords = new StringBuilder("[");
    // int unkIndex = wordIndex.size();
    for (int i = 0; i < length; i++) {
        String s = sentence.get(i).word();

        if (sentence.get(i) instanceof HasOffset) {
            HasOffset word = (HasOffset) sentence.get(i);
            beginOffsets[i] = word.beginPosition();
            endOffsets[i] = word.endPosition();
        } else {
            //Storing the positions of the word interstices
            //Account for single space between words
            beginOffsets[i] = ((i == 0) ? 0 : endOffsets[i - 1] + 1);
            endOffsets[i] = beginOffsets[i] + s.length();
        }

        if (sentence.get(i) instanceof CoreLabel) {
            originalCoreLabels[i] = (CoreLabel) sentence.get(i);
        }
        if (sentence.get(i) instanceof HasTag) {
            originalTags[i] = (HasTag) sentence.get(i);
        }

        if (op.testOptions.verbose && (!wordIndex.contains(s) || !lex.isKnown(wordIndex.indexOf(s)))) {
            unk++;
            unkWords.append(' ');
            unkWords.append(s);
            unkWords.append(" { ");
            for (int jj = 0; jj < s.length(); jj++) {
                char ch = s.charAt(jj);
                unkWords.append(Character.getType(ch)).append(" ");
            }
            unkWords.append("}");
        }
        // TODO: really, add a new word?
        //words[i] = wordIndex.indexOf(s, unkIndex);
        //if (words[i] == unkIndex) {
        //  ++unkIndex;
        //}
        //words[i] = wordIndex.indexOf(s, true);
        if (wordIndex.contains(s)) {
            words[i] = wordIndex.indexOf(s);
        } else {
            words[i] = wordIndex.indexOf(Lexicon.UNKNOWN_WORD);
        }
    }

    // initialize inside and outside score arrays
    if (spillGuts) {
        tick("Wiping arrays...");
    }
    for (int start = 0; start < length; start++) {
        for (int end = start + 1; end <= length; end++) {
            Arrays.fill(iScore[start][end], Float.NEGATIVE_INFINITY);
            if (op.doDep && !op.testOptions.useFastFactored) {
                Arrays.fill(oScore[start][end], Float.NEGATIVE_INFINITY);
            }
            if (op.testOptions.lengthNormalization) {
                Arrays.fill(wordsInSpan[start][end], 1);
            }
        }
    }
    for (int loc = 0; loc <= length; loc++) {
        Arrays.fill(narrowLExtent[loc], -1); // the rightmost left with state s ending at i that we can get is the beginning
        Arrays.fill(wideLExtent[loc], length + 1); // the leftmost left with state s ending at i that we can get is the end
    }
    for (int loc = 0; loc < length; loc++) {
        Arrays.fill(narrowRExtent[loc], length + 1); // the leftmost right with state s starting at i that we can get is the end
        Arrays.fill(wideRExtent[loc], -1); // the rightmost right with state s starting at i that we can get is the beginning
    }
    // int puncTag = stateIndex.indexOf(".");
    // boolean lastIsPunc = false;
    if (op.testOptions.verbose) {
        Timing.tick("done.");
        unkWords.append(" ]");
        op.tlpParams.pw(System.err).println("Unknown words: " + unk + " " + unkWords);
        System.err.print("Starting filters...");
    }
    // do tags
    if (spillGuts) {
        tick("Tagging...");
    }
    initializeChart(sentence);
    //if (op.testOptions.outsideFilter)
    // buildOFilter();
    if (op.testOptions.verbose) {
        Timing.tick("done.");
        System.err.print("Starting insides...");
    }
    // do the inside probabilities
    doInsideScores();
    if (op.testOptions.verbose) {
        // insideTime += Timing.tick("done.");
        Timing.tick("done.");
        System.out.println(
                "PCFG parsing " + length + " words (incl. stop): insideScore = " + iScore[0][length][goal]);
    }
    bestScore = iScore[0][length][goal];
    boolean succeeded = hasParse();
    if (op.testOptions.doRecovery && !succeeded && !floodTags) {
        floodTags = true; // sentence will try to reparse
        // ms: disabled message. this is annoying and it doesn't really provide much information
        //System.err.println("Trying recovery parse...");
        return parse(sentence);
    }
    if (!op.doDep || op.testOptions.useFastFactored) {
        return succeeded;
    }
    if (op.testOptions.verbose) {
        System.err.print("Starting outsides...");
    }
    // outside scores
    oScore[0][length][goal] = 0.0f;
    doOutsideScores();
    //System.out.println("State rate: "+((int)(1000*ohits/otries))/10.0);
    //System.out.println("Traversals: "+ohits);
    if (op.testOptions.verbose) {
        // outsideTime += Timing.tick("Done.");
        Timing.tick("done.");
    }

    if (op.doDep) {
        initializePossibles();
    }

    return succeeded;
}

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public ConditionalCFGParser(BinaryGrammar bg, UnaryGrammar ug, ConditionalLexicon lex, Options op,
        Index<String> stateIndex, Index<String> wordIndex, Index<String> tagIndex) {
    //    System.out.println("ExhaustivePCFGParser constructor called.");
    this.bg = bg;
    this.ug = ug;
    this.lex = lex;
    this.op = op;
    this.tlp = op.langpack();
    goalStr = tlp.startSymbol();/* w w  w.  ja  va  2s.c  om*/
    this.stateIndex = stateIndex;
    this.wordIndex = wordIndex;
    this.tagIndex = tagIndex;
    tf = new LabeledScoredTreeFactory();

    numStates = stateIndex.size();
    isTag = new boolean[numStates];
    // tag index is smaller, so we fill by iterating over the tag index
    // rather than over the state index
    for (String tag : tagIndex.objectsList()) {
        int state = stateIndex.indexOf(tag);
        if (state < 0) {
            continue;
        }
        isTag[state] = true;
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java

License:Open Source License

/**
 * Reads in a Penn Treebank-style String and returns a tree.
 * //  www .  j  a v a 2s  .  c o m
 * @param pennString
 *            A Penn Treebank-style String as produced by the StandfordParser
 * @return a tree representation of the PennString (LabeledScoredTree)
 */
public static Tree pennString2Tree(String pennString) {
    TreeReader tr = null;
    try {
        tr = new PennTreeReader(new StringReader(pennString), new LabeledScoredTreeFactory());
        return tr.readTree();
    } catch (IOException e) {
        throw new IllegalStateException(e);
    } finally {
        closeQuietly(tr);
    }
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

private AnalysisUtilities() {
    parser = null;/*from w  w  w.  j ava 2  s  .  c o m*/

    conjugator = new VerbConjugator();
    conjugator.load(GlobalProperties.getProperties().getProperty("verbConjugationsFile",
            "config" + File.separator + "verbConjugations.txt"));
    headfinder = new CollinsHeadFinder();
    tree_factory = new LabeledScoredTreeFactory();
    tlp = new PennTreebankLanguagePack();
}

From source file:edu.cmu.ark.nlp.question.QuestionUtil.java

License:Open Source License

public static LabeledScoredTreeFactory getTreeFactory() {
    if (tree_factory == null) {
        tree_factory = new LabeledScoredTreeFactory();
    }/*  w w  w  . jav  a 2s.c om*/
    return tree_factory;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

public SentenceSimplifier(Properties props) {
    factory = new LabeledScoredTreeFactory();
    this.hf = new CollinsHeadFinder();
    String computefeatures = props.getProperty("getComputeFeatures", "true");
    if (computefeatures.equals("true"))
        this.getComputeFeatures = true;
    else/*from  www.j av a2  s  .c o  m*/
        this.getComputeFeatures = false;
    this.props = props;
    conjugator = new VerbConjugator(props);

}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

public SentenceSimplifier() {
    factory = new LabeledScoredTreeFactory();
}

From source file:edu.jhu.agiga.StanfordAgigaSentence.java

License:Open Source License

public Tree getStanfordContituencyTree() {
    TreeFactory tf = new LabeledScoredTreeFactory();
    StringReader r = new StringReader(getParseText());
    TreeReader tr = new PennTreeReader(r, tf);
    try {//from www  .  ja  v  a2  s.c  o  m
        return tr.readTree();
    } catch (IOException e) {
        throw new RuntimeException("Error: IOException should not be thrown by StringReader");
    }
}

From source file:elkfed.expletives.TrainingData.java

License:Apache License

public static void extractExamples(String file, Set<String> anaphoricPronouns,
        List<ExpletiveInstance> instances) throws FileNotFoundException, IOException {
    TreeReader tr = new PennTreeReader(new FileReader(file), new LabeledScoredTreeFactory(),
            new BobChrisTreeNormalizer());
    Tree t;//from ww  w. ja va  2 s . c  o m
    String file_id = file.substring(file.length() - 8, file.length() - 4);
    int sent_idx = 1;
    while ((t = tr.readTree()) != null) {
        //t.pennPrint();
        int word_idx = 1;
        for (Tree t1 : t.getLeaves()) {
            String s = t1.toString();
            if ("it".equals(s) || "It".equals(s)) {
                String id = String.format("%s:S%d:%d-%d", file_id, sent_idx, word_idx, word_idx);
                ExpletiveInstance inst = new ExpletiveInstance(t, t1, id);
                boolean is_positive = anaphoricPronouns.contains(id);
                inst.setFeature(PairInstance.FD_POSITIVE, !is_positive);
                instances.add(inst);
                String cls = is_positive ? "+1" : "-1";
                System.out.format("%s\t%s\t(%s)\n", s, id, cls);
            }
            word_idx++;
        }
        //System.out.println();
        //System.out.println(t);
        sent_idx++;
    }

}

From source file:elkfed.mmax.importer.ImportOntonotes.java

License:Apache License

public MiniDiscourse importFile(String fname) {
    try {//from  www .j  ava 2s. c  o  m
        boolean had_space = true;
        boolean need_bugfix = System.getProperty("elkfed.BuggyOntonotes", "no").matches("y|yes|true");
        List<Tag> names_stack = new ArrayList<Tag>();
        Alphabet<String> sets = new Alphabet<String>();
        sets.lookupIndex("*DUMMY*");
        int sent_id = 0;
        Tag sentence_tag = null;
        OntonotesReader reader = new OntonotesReader(new File(fname + ".coref"));
        OntonotesReader readerNE = new OntonotesReader(new File(fname + ".name"));
        TreeReader tr = new PennTreeReader(new FileReader(fname + ".parse"), new LabeledScoredTreeFactory(),
                new BobChrisTreeNormalizer());
        Tree tree = null;
        int eventType = reader.getNextEvent();
        boolean in_text = false;
        do {
            if (eventType == OntonotesReader.START_TAG && "COREF".equals(reader.getName())) {
                Tag t;
                if (need_bugfix) {
                    t = buggy_push_tag("coref", tag_stack);
                } else {
                    t = push_tag("coref");
                }
                if ("IDENT".equals(reader.getAttribute("TYPE"))) {
                    t.attrs.put("coref_set", "set_" + sets.lookupIndex(reader.getAttribute("ID")));
                }
                had_space = true;
            } else if (eventType == OntonotesReader.END_TAG && "COREF".equals(reader.getName())) {
                Tag t = pop_tag("coref");
                DetermineMinSpan.addMinSpan(sentence_tag.start, tree, t, tokens);
                had_space = true;
            } else if (in_text && eventType == OntonotesReader.TOKEN) {
                if (!reader.isTrace()) {
                    // process up to the next token in the names part
                    int names_event = readerNE.getNextEvent();
                    while (names_event != OntonotesReader.TOKEN) {
                        if (names_event == OntonotesReader.START_TAG && "ENAMEX".equals(readerNE.getName())) {
                            Tag t = push_tag("enamex", names_stack);
                            t.attrs.put("tag", readerNE.getAttribute("TYPE"));
                        } else if (names_event == OntonotesReader.END_TAG
                                && "ENAMEX".equals(readerNE.getName())) {
                            Tag t = pop_tag("enamex", names_stack);
                        } else {
                            throw new IllegalStateException("Unexpected event:" + names_event);
                        }
                        names_event = readerNE.getNextEvent();
                    }
                    assert (reader.getToken().equals(readerNE.getToken()));
                    String tok = reader.getToken();
                    if (tok.equals("-LRB-"))
                        tok = "(";
                    if (tok.equals("-RRB-"))
                        tok = ")";
                    if (tok.equals("-LSB-"))
                        tok = "[";
                    if (tok.equals("-RSB-"))
                        tok = "]";
                    if (tok.equals("-LCB-"))
                        tok = "{";
                    if (tok.equals("-RCB-"))
                        tok = "}";
                    add_token(tok);
                }
            } else if (in_text && eventType == OntonotesReader.NEWLINE) {
                //System.out.println("sentence break");
                if (sentence_tag != null) {
                    sentence_tag.end = tokens.size() - 1;
                    if (sentence_tag.end >= sentence_tag.start) {
                        tags.add(sentence_tag);
                        if (tree != null) {
                            Tag parse_tag = new Tag();
                            parse_tag.tag = "parse";
                            parse_tag.start = sentence_tag.start;
                            parse_tag.end = sentence_tag.end;
                            parse_tag.attrs.put("tag", tree.toString());
                            tags.add(parse_tag);
                            assert sentence_tag.end - sentence_tag.start + 1 == tree.yield().size() : String
                                    .format("%s / %s", tokens.subList(sentence_tag.start, sentence_tag.end + 1),
                                            tree.yield());
                            addParseInfo(sentence_tag.start, tree);
                        }
                    }
                }
                // process up to end of sentence in names annotation
                int names_event = readerNE.getNextEvent();
                while (names_event != OntonotesReader.NEWLINE) {
                    if (names_event == OntonotesReader.START_TAG && "ENAMEX".equals(readerNE.getName())) {
                        Tag t = push_tag("enamex", names_stack);
                        t.attrs.put("tag", readerNE.getAttribute("TYPE"));
                    } else if (names_event == OntonotesReader.END_TAG && "ENAMEX".equals(readerNE.getName())) {
                        Tag t = pop_tag("enamex", names_stack);
                    } else if (names_event == OntonotesReader.END_TAG && "DOC".equals(readerNE.getName())) {
                        // ignore
                    } else {
                        throw new IllegalStateException(
                                "Unexpected event:" + readerNE.describeEvent(names_event));
                    }
                    names_event = readerNE.getNextEvent();
                }
                // prepare new parse and sentence
                sentence_tag = new Tag();
                sentence_tag.start = tokens.size();
                sentence_tag.tag = "sentence";
                sentence_tag.attrs.put("orderid", "" + sent_id++);
                tree = tr.readTree();
            } else if (eventType == OntonotesReader.END_TAG && "DOCNO".equals(reader.getName())) {
                in_text = true;
                // go to the end of the DOCNO part in name doc
                int names_event = readerNE.getNextEvent();
                while (names_event != OntonotesReader.END_TAG || !"DOCNO".equals(reader.getName())) {
                    names_event = readerNE.getNextEvent();
                }
            } else if (eventType == OntonotesReader.START_TAG && "TURN".equals(reader.getName())) {
                int names_event = readerNE.getNextEvent();
                if (names_event != OntonotesReader.START_TAG || !"TURN".equals(readerNE.getName())) {
                    throw new UnsupportedOperationException("TURN in coref but not in names");
                }
                // parse level seems to be inconsistent... so don't check here :-|
                System.err.println("TURN parse:" + tree.toString());
                tree = tr.readTree();
                eventType = reader.getNextEvent();
                names_event = readerNE.getNextEvent();
                if (eventType != OntonotesReader.NEWLINE || names_event != OntonotesReader.NEWLINE) {
                    throw new UnsupportedOperationException("No Newline after TURN");
                }
            }
            eventType = reader.getNextEvent();
        } while (eventType != OntonotesReader.END_DOCUMENT);
        return create();
    } catch (IOException ex) {
        throw new RuntimeException("Cannot read file", ex);
    }

}