List of usage examples for edu.stanford.nlp.trees LabeledScoredTreeFactory LabeledScoredTreeFactory
public LabeledScoredTreeFactory()
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
public boolean parse(List<? extends HasWord> sentence) { if (tf == null) { tf = new LabeledScoredTreeFactory(); }//from w w w . j ava 2 s .c o m lr = null; // better nullPointer exception than silent error //System.out.println("is it a taggedword?" + (sentence.get(0) instanceof TaggedWord)); //debugging if (sentence != this.sentence) { this.sentence = sentence; floodTags = false; } if (op.testOptions.verbose) { Timing.tick("Starting pcfg parse."); } if (spillGuts) { tick("Starting PCFG parse..."); } length = sentence.size(); if (length > arraySize) { considerCreatingArrays(length); } int goal = stateIndex.indexOf(goalStr); if (op.testOptions.verbose) { // System.out.println(numStates + " states, " + goal + " is the goal state."); // System.err.println(new ArrayList(ug.coreRules.keySet())); System.err.print("Initializing PCFG..."); } // map input words to words array (wordIndex ints) words = new int[length]; beginOffsets = new int[length]; endOffsets = new int[length]; originalCoreLabels = new CoreLabel[length]; originalTags = new HasTag[length]; int unk = 0; StringBuilder unkWords = new StringBuilder("["); // int unkIndex = wordIndex.size(); for (int i = 0; i < length; i++) { String s = sentence.get(i).word(); if (sentence.get(i) instanceof HasOffset) { HasOffset word = (HasOffset) sentence.get(i); beginOffsets[i] = word.beginPosition(); endOffsets[i] = word.endPosition(); } else { //Storing the positions of the word interstices //Account for single space between words beginOffsets[i] = ((i == 0) ? 0 : endOffsets[i - 1] + 1); endOffsets[i] = beginOffsets[i] + s.length(); } if (sentence.get(i) instanceof CoreLabel) { originalCoreLabels[i] = (CoreLabel) sentence.get(i); } if (sentence.get(i) instanceof HasTag) { originalTags[i] = (HasTag) sentence.get(i); } if (op.testOptions.verbose && (!wordIndex.contains(s) || !lex.isKnown(wordIndex.indexOf(s)))) { unk++; unkWords.append(' '); unkWords.append(s); unkWords.append(" { "); for (int jj = 0; jj < s.length(); jj++) { char ch = s.charAt(jj); unkWords.append(Character.getType(ch)).append(" "); } unkWords.append("}"); } // TODO: really, add a new word? //words[i] = wordIndex.indexOf(s, unkIndex); //if (words[i] == unkIndex) { // ++unkIndex; //} //words[i] = wordIndex.indexOf(s, true); if (wordIndex.contains(s)) { words[i] = wordIndex.indexOf(s); } else { words[i] = wordIndex.indexOf(Lexicon.UNKNOWN_WORD); } } // initialize inside and outside score arrays if (spillGuts) { tick("Wiping arrays..."); } for (int start = 0; start < length; start++) { for (int end = start + 1; end <= length; end++) { Arrays.fill(iScore[start][end], Float.NEGATIVE_INFINITY); if (op.doDep && !op.testOptions.useFastFactored) { Arrays.fill(oScore[start][end], Float.NEGATIVE_INFINITY); } if (op.testOptions.lengthNormalization) { Arrays.fill(wordsInSpan[start][end], 1); } } } for (int loc = 0; loc <= length; loc++) { Arrays.fill(narrowLExtent[loc], -1); // the rightmost left with state s ending at i that we can get is the beginning Arrays.fill(wideLExtent[loc], length + 1); // the leftmost left with state s ending at i that we can get is the end } for (int loc = 0; loc < length; loc++) { Arrays.fill(narrowRExtent[loc], length + 1); // the leftmost right with state s starting at i that we can get is the end Arrays.fill(wideRExtent[loc], -1); // the rightmost right with state s starting at i that we can get is the beginning } // int puncTag = stateIndex.indexOf("."); // boolean lastIsPunc = false; if (op.testOptions.verbose) { Timing.tick("done."); unkWords.append(" ]"); op.tlpParams.pw(System.err).println("Unknown words: " + unk + " " + unkWords); System.err.print("Starting filters..."); } // do tags if (spillGuts) { tick("Tagging..."); } initializeChart(sentence); //if (op.testOptions.outsideFilter) // buildOFilter(); if (op.testOptions.verbose) { Timing.tick("done."); System.err.print("Starting insides..."); } // do the inside probabilities doInsideScores(); if (op.testOptions.verbose) { // insideTime += Timing.tick("done."); Timing.tick("done."); System.out.println( "PCFG parsing " + length + " words (incl. stop): insideScore = " + iScore[0][length][goal]); } bestScore = iScore[0][length][goal]; boolean succeeded = hasParse(); if (op.testOptions.doRecovery && !succeeded && !floodTags) { floodTags = true; // sentence will try to reparse // ms: disabled message. this is annoying and it doesn't really provide much information //System.err.println("Trying recovery parse..."); return parse(sentence); } if (!op.doDep || op.testOptions.useFastFactored) { return succeeded; } if (op.testOptions.verbose) { System.err.print("Starting outsides..."); } // outside scores oScore[0][length][goal] = 0.0f; doOutsideScores(); //System.out.println("State rate: "+((int)(1000*ohits/otries))/10.0); //System.out.println("Traversals: "+ohits); if (op.testOptions.verbose) { // outsideTime += Timing.tick("Done."); Timing.tick("done."); } if (op.doDep) { initializePossibles(); } return succeeded; }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
public ConditionalCFGParser(BinaryGrammar bg, UnaryGrammar ug, ConditionalLexicon lex, Options op, Index<String> stateIndex, Index<String> wordIndex, Index<String> tagIndex) { // System.out.println("ExhaustivePCFGParser constructor called."); this.bg = bg; this.ug = ug; this.lex = lex; this.op = op; this.tlp = op.langpack(); goalStr = tlp.startSymbol();/* w w w. ja va 2s.c om*/ this.stateIndex = stateIndex; this.wordIndex = wordIndex; this.tagIndex = tagIndex; tf = new LabeledScoredTreeFactory(); numStates = stateIndex.size(); isTag = new boolean[numStates]; // tag index is smaller, so we fill by iterating over the tag index // rather than over the state index for (String tag : tagIndex.objectsList()) { int state = stateIndex.indexOf(tag); if (state < 0) { continue; } isTag[state] = true; } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java
License:Open Source License
/** * Reads in a Penn Treebank-style String and returns a tree. * // www . j a v a 2s . c o m * @param pennString * A Penn Treebank-style String as produced by the StandfordParser * @return a tree representation of the PennString (LabeledScoredTree) */ public static Tree pennString2Tree(String pennString) { TreeReader tr = null; try { tr = new PennTreeReader(new StringReader(pennString), new LabeledScoredTreeFactory()); return tr.readTree(); } catch (IOException e) { throw new IllegalStateException(e); } finally { closeQuietly(tr); } }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
private AnalysisUtilities() { parser = null;/*from w w w. j ava 2 s . c o m*/ conjugator = new VerbConjugator(); conjugator.load(GlobalProperties.getProperties().getProperty("verbConjugationsFile", "config" + File.separator + "verbConjugations.txt")); headfinder = new CollinsHeadFinder(); tree_factory = new LabeledScoredTreeFactory(); tlp = new PennTreebankLanguagePack(); }
From source file:edu.cmu.ark.nlp.question.QuestionUtil.java
License:Open Source License
public static LabeledScoredTreeFactory getTreeFactory() { if (tree_factory == null) { tree_factory = new LabeledScoredTreeFactory(); }/* w w w . jav a 2s.c om*/ return tree_factory; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
public SentenceSimplifier(Properties props) { factory = new LabeledScoredTreeFactory(); this.hf = new CollinsHeadFinder(); String computefeatures = props.getProperty("getComputeFeatures", "true"); if (computefeatures.equals("true")) this.getComputeFeatures = true; else/*from www.j av a2 s .c o m*/ this.getComputeFeatures = false; this.props = props; conjugator = new VerbConjugator(props); }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
public SentenceSimplifier() { factory = new LabeledScoredTreeFactory(); }
From source file:edu.jhu.agiga.StanfordAgigaSentence.java
License:Open Source License
public Tree getStanfordContituencyTree() { TreeFactory tf = new LabeledScoredTreeFactory(); StringReader r = new StringReader(getParseText()); TreeReader tr = new PennTreeReader(r, tf); try {//from www . ja v a2 s.c o m return tr.readTree(); } catch (IOException e) { throw new RuntimeException("Error: IOException should not be thrown by StringReader"); } }
From source file:elkfed.expletives.TrainingData.java
License:Apache License
public static void extractExamples(String file, Set<String> anaphoricPronouns, List<ExpletiveInstance> instances) throws FileNotFoundException, IOException { TreeReader tr = new PennTreeReader(new FileReader(file), new LabeledScoredTreeFactory(), new BobChrisTreeNormalizer()); Tree t;//from ww w. ja va 2 s . c o m String file_id = file.substring(file.length() - 8, file.length() - 4); int sent_idx = 1; while ((t = tr.readTree()) != null) { //t.pennPrint(); int word_idx = 1; for (Tree t1 : t.getLeaves()) { String s = t1.toString(); if ("it".equals(s) || "It".equals(s)) { String id = String.format("%s:S%d:%d-%d", file_id, sent_idx, word_idx, word_idx); ExpletiveInstance inst = new ExpletiveInstance(t, t1, id); boolean is_positive = anaphoricPronouns.contains(id); inst.setFeature(PairInstance.FD_POSITIVE, !is_positive); instances.add(inst); String cls = is_positive ? "+1" : "-1"; System.out.format("%s\t%s\t(%s)\n", s, id, cls); } word_idx++; } //System.out.println(); //System.out.println(t); sent_idx++; } }
From source file:elkfed.mmax.importer.ImportOntonotes.java
License:Apache License
public MiniDiscourse importFile(String fname) { try {//from www .j ava 2s. c o m boolean had_space = true; boolean need_bugfix = System.getProperty("elkfed.BuggyOntonotes", "no").matches("y|yes|true"); List<Tag> names_stack = new ArrayList<Tag>(); Alphabet<String> sets = new Alphabet<String>(); sets.lookupIndex("*DUMMY*"); int sent_id = 0; Tag sentence_tag = null; OntonotesReader reader = new OntonotesReader(new File(fname + ".coref")); OntonotesReader readerNE = new OntonotesReader(new File(fname + ".name")); TreeReader tr = new PennTreeReader(new FileReader(fname + ".parse"), new LabeledScoredTreeFactory(), new BobChrisTreeNormalizer()); Tree tree = null; int eventType = reader.getNextEvent(); boolean in_text = false; do { if (eventType == OntonotesReader.START_TAG && "COREF".equals(reader.getName())) { Tag t; if (need_bugfix) { t = buggy_push_tag("coref", tag_stack); } else { t = push_tag("coref"); } if ("IDENT".equals(reader.getAttribute("TYPE"))) { t.attrs.put("coref_set", "set_" + sets.lookupIndex(reader.getAttribute("ID"))); } had_space = true; } else if (eventType == OntonotesReader.END_TAG && "COREF".equals(reader.getName())) { Tag t = pop_tag("coref"); DetermineMinSpan.addMinSpan(sentence_tag.start, tree, t, tokens); had_space = true; } else if (in_text && eventType == OntonotesReader.TOKEN) { if (!reader.isTrace()) { // process up to the next token in the names part int names_event = readerNE.getNextEvent(); while (names_event != OntonotesReader.TOKEN) { if (names_event == OntonotesReader.START_TAG && "ENAMEX".equals(readerNE.getName())) { Tag t = push_tag("enamex", names_stack); t.attrs.put("tag", readerNE.getAttribute("TYPE")); } else if (names_event == OntonotesReader.END_TAG && "ENAMEX".equals(readerNE.getName())) { Tag t = pop_tag("enamex", names_stack); } else { throw new IllegalStateException("Unexpected event:" + names_event); } names_event = readerNE.getNextEvent(); } assert (reader.getToken().equals(readerNE.getToken())); String tok = reader.getToken(); if (tok.equals("-LRB-")) tok = "("; if (tok.equals("-RRB-")) tok = ")"; if (tok.equals("-LSB-")) tok = "["; if (tok.equals("-RSB-")) tok = "]"; if (tok.equals("-LCB-")) tok = "{"; if (tok.equals("-RCB-")) tok = "}"; add_token(tok); } } else if (in_text && eventType == OntonotesReader.NEWLINE) { //System.out.println("sentence break"); if (sentence_tag != null) { sentence_tag.end = tokens.size() - 1; if (sentence_tag.end >= sentence_tag.start) { tags.add(sentence_tag); if (tree != null) { Tag parse_tag = new Tag(); parse_tag.tag = "parse"; parse_tag.start = sentence_tag.start; parse_tag.end = sentence_tag.end; parse_tag.attrs.put("tag", tree.toString()); tags.add(parse_tag); assert sentence_tag.end - sentence_tag.start + 1 == tree.yield().size() : String .format("%s / %s", tokens.subList(sentence_tag.start, sentence_tag.end + 1), tree.yield()); addParseInfo(sentence_tag.start, tree); } } } // process up to end of sentence in names annotation int names_event = readerNE.getNextEvent(); while (names_event != OntonotesReader.NEWLINE) { if (names_event == OntonotesReader.START_TAG && "ENAMEX".equals(readerNE.getName())) { Tag t = push_tag("enamex", names_stack); t.attrs.put("tag", readerNE.getAttribute("TYPE")); } else if (names_event == OntonotesReader.END_TAG && "ENAMEX".equals(readerNE.getName())) { Tag t = pop_tag("enamex", names_stack); } else if (names_event == OntonotesReader.END_TAG && "DOC".equals(readerNE.getName())) { // ignore } else { throw new IllegalStateException( "Unexpected event:" + readerNE.describeEvent(names_event)); } names_event = readerNE.getNextEvent(); } // prepare new parse and sentence sentence_tag = new Tag(); sentence_tag.start = tokens.size(); sentence_tag.tag = "sentence"; sentence_tag.attrs.put("orderid", "" + sent_id++); tree = tr.readTree(); } else if (eventType == OntonotesReader.END_TAG && "DOCNO".equals(reader.getName())) { in_text = true; // go to the end of the DOCNO part in name doc int names_event = readerNE.getNextEvent(); while (names_event != OntonotesReader.END_TAG || !"DOCNO".equals(reader.getName())) { names_event = readerNE.getNextEvent(); } } else if (eventType == OntonotesReader.START_TAG && "TURN".equals(reader.getName())) { int names_event = readerNE.getNextEvent(); if (names_event != OntonotesReader.START_TAG || !"TURN".equals(readerNE.getName())) { throw new UnsupportedOperationException("TURN in coref but not in names"); } // parse level seems to be inconsistent... so don't check here :-| System.err.println("TURN parse:" + tree.toString()); tree = tr.readTree(); eventType = reader.getNextEvent(); names_event = readerNE.getNextEvent(); if (eventType != OntonotesReader.NEWLINE || names_event != OntonotesReader.NEWLINE) { throw new UnsupportedOperationException("No Newline after TURN"); } } eventType = reader.getNextEvent(); } while (eventType != OntonotesReader.END_DOCUMENT); return create(); } catch (IOException ex) { throw new RuntimeException("Cannot read file", ex); } }