Example usage for edu.stanford.nlp.trees LabeledScoredTreeFactory LabeledScoredTreeFactory

List of usage examples for edu.stanford.nlp.trees LabeledScoredTreeFactory LabeledScoredTreeFactory

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees LabeledScoredTreeFactory LabeledScoredTreeFactory.

Prototype

public LabeledScoredTreeFactory() 

Source Link

Document

Make a TreeFactory that produces LabeledScoredTree trees.

Usage

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public boolean parse(List<? extends HasWord> sentence) {
    if (tf == null) {
        tf = new LabeledScoredTreeFactory();
    }//from w  w w .  j  ava  2  s  .c  o  m
    lr = null; // better nullPointer exception than silent error
    //System.out.println("is it a taggedword?" + (sentence.get(0) instanceof TaggedWord)); //debugging
    if (sentence != this.sentence) {
        this.sentence = sentence;
        floodTags = false;
    }
    if (op.testOptions.verbose) {
        Timing.tick("Starting pcfg parse.");
    }
    if (spillGuts) {
        tick("Starting PCFG parse...");
    }
    length = sentence.size();
    if (length > arraySize) {
        considerCreatingArrays(length);
    }
    int goal = stateIndex.indexOf(goalStr);
    if (op.testOptions.verbose) {
        // System.out.println(numStates + " states, " + goal + " is the goal state.");
        // System.err.println(new ArrayList(ug.coreRules.keySet()));
        System.err.print("Initializing PCFG...");
    }
    // map input words to words array (wordIndex ints)
    words = new int[length];
    beginOffsets = new int[length];
    endOffsets = new int[length];
    originalCoreLabels = new CoreLabel[length];
    originalTags = new HasTag[length];
    int unk = 0;
    StringBuilder unkWords = new StringBuilder("[");
    // int unkIndex = wordIndex.size();
    for (int i = 0; i < length; i++) {
        String s = sentence.get(i).word();

        if (sentence.get(i) instanceof HasOffset) {
            HasOffset word = (HasOffset) sentence.get(i);
            beginOffsets[i] = word.beginPosition();
            endOffsets[i] = word.endPosition();
        } else {
            //Storing the positions of the word interstices
            //Account for single space between words
            beginOffsets[i] = ((i == 0) ? 0 : endOffsets[i - 1] + 1);
            endOffsets[i] = beginOffsets[i] + s.length();
        }

        if (sentence.get(i) instanceof CoreLabel) {
            originalCoreLabels[i] = (CoreLabel) sentence.get(i);
        }
        if (sentence.get(i) instanceof HasTag) {
            originalTags[i] = (HasTag) sentence.get(i);
        }

        if (op.testOptions.verbose && (!wordIndex.contains(s) || !lex.isKnown(wordIndex.indexOf(s)))) {
            unk++;
            unkWords.append(' ');
            unkWords.append(s);
            unkWords.append(" { ");
            for (int jj = 0; jj < s.length(); jj++) {
                char ch = s.charAt(jj);
                unkWords.append(Character.getType(ch)).append(" ");
            }
            unkWords.append("}");
        }
        // TODO: really, add a new word?
        //words[i] = wordIndex.indexOf(s, unkIndex);
        //if (words[i] == unkIndex) {
        //  ++unkIndex;
        //}
        //words[i] = wordIndex.indexOf(s, true);
        if (wordIndex.contains(s)) {
            words[i] = wordIndex.indexOf(s);
        } else {
            words[i] = wordIndex.indexOf(Lexicon.UNKNOWN_WORD);
        }
    }

    // initialize inside and outside score arrays
    if (spillGuts) {
        tick("Wiping arrays...");
    }
    for (int start = 0; start < length; start++) {
        for (int end = start + 1; end <= length; end++) {
            Arrays.fill(iScore[start][end], Float.NEGATIVE_INFINITY);
            if (op.doDep && !op.testOptions.useFastFactored) {
                Arrays.fill(oScore[start][end], Float.NEGATIVE_INFINITY);
            }
            if (op.testOptions.lengthNormalization) {
                Arrays.fill(wordsInSpan[start][end], 1);
            }
        }
    }
    for (int loc = 0; loc <= length; loc++) {
        Arrays.fill(narrowLExtent[loc], -1); // the rightmost left with state s ending at i that we can get is the beginning
        Arrays.fill(wideLExtent[loc], length + 1); // the leftmost left with state s ending at i that we can get is the end
    }
    for (int loc = 0; loc < length; loc++) {
        Arrays.fill(narrowRExtent[loc], length + 1); // the leftmost right with state s starting at i that we can get is the end
        Arrays.fill(wideRExtent[loc], -1); // the rightmost right with state s starting at i that we can get is the beginning
    }
    // int puncTag = stateIndex.indexOf(".");
    // boolean lastIsPunc = false;
    if (op.testOptions.verbose) {
        Timing.tick("done.");
        unkWords.append(" ]");
        op.tlpParams.pw(System.err).println("Unknown words: " + unk + " " + unkWords);
        System.err.print("Starting filters...");
    }
    // do tags
    if (spillGuts) {
        tick("Tagging...");
    }
    initializeChart(sentence);
    //if (op.testOptions.outsideFilter)
    // buildOFilter();
    if (op.testOptions.verbose) {
        Timing.tick("done.");
        System.err.print("Starting insides...");
    }
    // do the inside probabilities
    doInsideScores();
    if (op.testOptions.verbose) {
        // insideTime += Timing.tick("done.");
        Timing.tick("done.");
        System.out.println(
                "PCFG parsing " + length + " words (incl. stop): insideScore = " + iScore[0][length][goal]);
    }
    bestScore = iScore[0][length][goal];
    boolean succeeded = hasParse();
    if (op.testOptions.doRecovery && !succeeded && !floodTags) {
        floodTags = true; // sentence will try to reparse
        // ms: disabled message. this is annoying and it doesn't really provide much information
        //System.err.println("Trying recovery parse...");
        return parse(sentence);
    }
    if (!op.doDep || op.testOptions.useFastFactored) {
        return succeeded;
    }
    if (op.testOptions.verbose) {
        System.err.print("Starting outsides...");
    }
    // outside scores
    oScore[0][length][goal] = 0.0f;
    doOutsideScores();
    //System.out.println("State rate: "+((int)(1000*ohits/otries))/10.0);
    //System.out.println("Traversals: "+ohits);
    if (op.testOptions.verbose) {
        // outsideTime += Timing.tick("Done.");
        Timing.tick("done.");
    }

    if (op.doDep) {
        initializePossibles();
    }

    return succeeded;
}

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public ConditionalCFGParser(BinaryGrammar bg, UnaryGrammar ug, ConditionalLexicon lex, Options op,
        Index<String> stateIndex, Index<String> wordIndex, Index<String> tagIndex) {
    //    System.out.println("ExhaustivePCFGParser constructor called.");
    this.bg = bg;
    this.ug = ug;
    this.lex = lex;
    this.op = op;
    this.tlp = op.langpack();
    goalStr = tlp.startSymbol();/* w w  w.  ja  va  2s.c  om*/
    this.stateIndex = stateIndex;
    this.wordIndex = wordIndex;
    this.tagIndex = tagIndex;
    tf = new LabeledScoredTreeFactory();

    numStates = stateIndex.size();
    isTag = new boolean[numStates];
    // tag index is smaller, so we fill by iterating over the tag index
    // rather than over the state index
    for (String tag : tagIndex.objectsList()) {
        int state = stateIndex.indexOf(tag);
        if (state < 0) {
            continue;
        }
        isTag[state] = true;
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java

License:Open Source License

/**
 * Reads in a Penn Treebank-style String and returns a tree.
 * //  www .  j  a v a 2s  .  c o m
 * @param pennString
 *            A Penn Treebank-style String as produced by the StandfordParser
 * @return a tree representation of the PennString (LabeledScoredTree)
 */
public static Tree pennString2Tree(String pennString) {
    TreeReader tr = null;
    try {
        tr = new PennTreeReader(new StringReader(pennString), new LabeledScoredTreeFactory());
        return tr.readTree();
    } catch (IOException e) {
        throw new IllegalStateException(e);
    } finally {
        closeQuietly(tr);
    }
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

private AnalysisUtilities() {
    parser = null;/*from w  w  w.  j ava 2  s  .  c o m*/

    conjugator = new VerbConjugator();
    conjugator.load(GlobalProperties.getProperties().getProperty("verbConjugationsFile",
            "config" + File.separator + "verbConjugations.txt"));
    headfinder = new CollinsHeadFinder();
    tree_factory = new LabeledScoredTreeFactory();
    tlp = new PennTreebankLanguagePack();
}

From source file:edu.cmu.ark.nlp.question.QuestionUtil.java

License:Open Source License

public static LabeledScoredTreeFactory getTreeFactory() {
    if (tree_factory == null) {
        tree_factory = new LabeledScoredTreeFactory();
    }/*  w w  w  . jav  a 2s.c om*/
    return tree_factory;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

public SentenceSimplifier(Properties props) {
    factory = new LabeledScoredTreeFactory();
    this.hf = new CollinsHeadFinder();
    String computefeatures = props.getProperty("getComputeFeatures", "true");
    if (computefeatures.equals("true"))
        this.getComputeFeatures = true;
    else/*from  www.j av a2  s  .c o  m*/
        this.getComputeFeatures = false;
    this.props = props;
    conjugator = new VerbConjugator(props);

}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

public SentenceSimplifier() {
    factory = new LabeledScoredTreeFactory();
}

From source file:edu.jhu.agiga.StanfordAgigaSentence.java

License:Open Source License

public Tree getStanfordContituencyTree() {
    TreeFactory tf = new LabeledScoredTreeFactory();
    StringReader r = new StringReader(getParseText());
    TreeReader tr = new PennTreeReader(r, tf);
    try {//from www  .  ja  v  a2  s.c  o  m
        return tr.readTree();
    } catch (IOException e) {
        throw new RuntimeException("Error: IOException should not be thrown by StringReader");
    }
}

From source file:elkfed.expletives.TrainingData.java

License:Apache License

public static void extractExamples(String file, Set<String> anaphoricPronouns,
        List<ExpletiveInstance> instances) throws FileNotFoundException, IOException {
    TreeReader tr = new PennTreeReader(new FileReader(file), new LabeledScoredTreeFactory(),
            new BobChrisTreeNormalizer());
    Tree t;//from ww  w. ja va  2 s . c  o m
    String file_id = file.substring(file.length() - 8, file.length() - 4);
    int sent_idx = 1;
    while ((t = tr.readTree()) != null) {
        //t.pennPrint();
        int word_idx = 1;
        for (Tree t1 : t.getLeaves()) {
            String s = t1.toString();
            if ("it".equals(s) || "It".equals(s)) {
                String id = String.format("%s:S%d:%d-%d", file_id, sent_idx, word_idx, word_idx);
                ExpletiveInstance inst = new ExpletiveInstance(t, t1, id);
                boolean is_positive = anaphoricPronouns.contains(id);
                inst.setFeature(PairInstance.FD_POSITIVE, !is_positive);
                instances.add(inst);
                String cls = is_positive ? "+1" : "-1";
                System.out.format("%s\t%s\t(%s)\n", s, id, cls);
            }
            word_idx++;
        }
        //System.out.println();
        //System.out.println(t);
        sent_idx++;
    }

}

From source file:elkfed.mmax.importer.ImportOntonotes.java

License:Apache License

public MiniDiscourse importFile(String fname) {
    try {//from  www .j  ava 2s. c  o  m
        boolean had_space = true;
        boolean need_bugfix = System.getProperty("elkfed.BuggyOntonotes", "no").matches("y|yes|true");
        List<Tag> names_stack = new ArrayList<Tag>();
        Alphabet<String> sets = new Alphabet<String>();
        sets.lookupIndex("*DUMMY*");
        int sent_id = 0;
        Tag sentence_tag = null;
        OntonotesReader reader = new OntonotesReader(new File(fname + ".coref"));
        OntonotesReader readerNE = new OntonotesReader(new File(fname + ".name"));
        TreeReader tr = new PennTreeReader(new FileReader(fname + ".parse"), new LabeledScoredTreeFactory(),
                new BobChrisTreeNormalizer());
        Tree tree = null;
        int eventType = reader.getNextEvent();
        boolean in_text = false;
        do {
            if (eventType == OntonotesReader.START_TAG && "COREF".equals(reader.getName())) {
                Tag t;
                if (need_bugfix) {
                    t = buggy_push_tag("coref", tag_stack);
                } else {
                    t = push_tag("coref");
                }
                if ("IDENT".equals(reader.getAttribute("TYPE"))) {
                    t.attrs.put("coref_set", "set_" + sets.lookupIndex(reader.getAttribute("ID")));
                }
                had_space = true;
            } else if (eventType == OntonotesReader.END_TAG && "COREF".equals(reader.getName())) {
                Tag t = pop_tag("coref");
                DetermineMinSpan.addMinSpan(sentence_tag.start, tree, t, tokens);
                had_space = true;
            } else if (in_text && eventType == OntonotesReader.TOKEN) {
                if (!reader.isTrace()) {
                    // process up to the next token in the names part
                    int names_event = readerNE.getNextEvent();
                    while (names_event != OntonotesReader.TOKEN) {
                        if (names_event == OntonotesReader.START_TAG && "ENAMEX".equals(readerNE.getName())) {
                            Tag t = push_tag("enamex", names_stack);
                            t.attrs.put("tag", readerNE.getAttribute("TYPE"));
                        } else if (names_event == OntonotesReader.END_TAG
                                && "ENAMEX".equals(readerNE.getName())) {
                            Tag t = pop_tag("enamex", names_stack);
                        } else {
                            throw new IllegalStateException("Unexpected event:" + names_event);
                        }
                        names_event = readerNE.getNextEvent();
                    }
                    assert (reader.getToken().equals(readerNE.getToken()));
                    String tok = reader.getToken();
                    if (tok.equals("-LRB-"))
                        tok = "(";
                    if (tok.equals("-RRB-"))
                        tok = ")";
                    if (tok.equals("-LSB-"))
                        tok = "[";
                    if (tok.equals("-RSB-"))
                        tok = "]";
                    if (tok.equals("-LCB-"))
                        tok = "{";
                    if (tok.equals("-RCB-"))
                        tok = "}";
                    add_token(tok);
                }
            } else if (in_text && eventType == OntonotesReader.NEWLINE) {
                //System.out.println("sentence break");
                if (sentence_tag != null) {
                    sentence_tag.end = tokens.size() - 1;
                    if (sentence_tag.end >= sentence_tag.start) {
                        tags.add(sentence_tag);
                        if (tree != null) {
                            Tag parse_tag = new Tag();
                            parse_tag.tag = "parse";
                            parse_tag.start = sentence_tag.start;
                            parse_tag.end = sentence_tag.end;
                            parse_tag.attrs.put("tag", tree.toString());
                            tags.add(parse_tag);
                            assert sentence_tag.end - sentence_tag.start + 1 == tree.yield().size() : String
                                    .format("%s / %s", tokens.subList(sentence_tag.start, sentence_tag.end + 1),
                                            tree.yield());
                            addParseInfo(sentence_tag.start, tree);
                        }
                    }
                }
                // process up to end of sentence in names annotation
                int names_event = readerNE.getNextEvent();
                while (names_event != OntonotesReader.NEWLINE) {
                    if (names_event == OntonotesReader.START_TAG && "ENAMEX".equals(readerNE.getName())) {
                        Tag t = push_tag("enamex", names_stack);
                        t.attrs.put("tag", readerNE.getAttribute("TYPE"));
                    } else if (names_event == OntonotesReader.END_TAG && "ENAMEX".equals(readerNE.getName())) {
                        Tag t = pop_tag("enamex", names_stack);
                    } else if (names_event == OntonotesReader.END_TAG && "DOC".equals(readerNE.getName())) {
                        // ignore
                    } else {
                        throw new IllegalStateException(
                                "Unexpected event:" + readerNE.describeEvent(names_event));
                    }
                    names_event = readerNE.getNextEvent();
                }
                // prepare new parse and sentence
                sentence_tag = new Tag();
                sentence_tag.start = tokens.size();
                sentence_tag.tag = "sentence";
                sentence_tag.attrs.put("orderid", "" + sent_id++);
                tree = tr.readTree();
            } else if (eventType == OntonotesReader.END_TAG && "DOCNO".equals(reader.getName())) {
                in_text = true;
                // go to the end of the DOCNO part in name doc
                int names_event = readerNE.getNextEvent();
                while (names_event != OntonotesReader.END_TAG || !"DOCNO".equals(reader.getName())) {
                    names_event = readerNE.getNextEvent();
                }
            } else if (eventType == OntonotesReader.START_TAG && "TURN".equals(reader.getName())) {
                int names_event = readerNE.getNextEvent();
                if (names_event != OntonotesReader.START_TAG || !"TURN".equals(readerNE.getName())) {
                    throw new UnsupportedOperationException("TURN in coref but not in names");
                }
                // parse level seems to be inconsistent... so don't check here :-|
                System.err.println("TURN parse:" + tree.toString());
                tree = tr.readTree();
                eventType = reader.getNextEvent();
                names_event = readerNE.getNextEvent();
                if (eventType != OntonotesReader.NEWLINE || names_event != OntonotesReader.NEWLINE) {
                    throw new UnsupportedOperationException("No Newline after TURN");
                }
            }
            eventType = reader.getNextEvent();
        } while (eventType != OntonotesReader.END_DOCUMENT);
        return create();
    } catch (IOException ex) {
        throw new RuntimeException("Cannot read file", ex);
    }

}