Example usage for edu.stanford.nlp.parser.lexparser Lexicon UNKNOWN_WORD

List of usage examples for edu.stanford.nlp.parser.lexparser Lexicon UNKNOWN_WORD

Introduction

In this page you can find the example usage for edu.stanford.nlp.parser.lexparser Lexicon UNKNOWN_WORD.

Prototype

String UNKNOWN_WORD

To view the source code for edu.stanford.nlp.parser.lexparser Lexicon UNKNOWN_WORD.

Click Source Link

Usage

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public boolean parse(List<? extends HasWord> sentence) {
    if (tf == null) {
        tf = new LabeledScoredTreeFactory();
    }//from   w w  w  .  j ava2s  .c  o m
    lr = null; // better nullPointer exception than silent error
    //System.out.println("is it a taggedword?" + (sentence.get(0) instanceof TaggedWord)); //debugging
    if (sentence != this.sentence) {
        this.sentence = sentence;
        floodTags = false;
    }
    if (op.testOptions.verbose) {
        Timing.tick("Starting pcfg parse.");
    }
    if (spillGuts) {
        tick("Starting PCFG parse...");
    }
    length = sentence.size();
    if (length > arraySize) {
        considerCreatingArrays(length);
    }
    int goal = stateIndex.indexOf(goalStr);
    if (op.testOptions.verbose) {
        // System.out.println(numStates + " states, " + goal + " is the goal state.");
        // System.err.println(new ArrayList(ug.coreRules.keySet()));
        System.err.print("Initializing PCFG...");
    }
    // map input words to words array (wordIndex ints)
    words = new int[length];
    beginOffsets = new int[length];
    endOffsets = new int[length];
    originalCoreLabels = new CoreLabel[length];
    originalTags = new HasTag[length];
    int unk = 0;
    StringBuilder unkWords = new StringBuilder("[");
    // int unkIndex = wordIndex.size();
    for (int i = 0; i < length; i++) {
        String s = sentence.get(i).word();

        if (sentence.get(i) instanceof HasOffset) {
            HasOffset word = (HasOffset) sentence.get(i);
            beginOffsets[i] = word.beginPosition();
            endOffsets[i] = word.endPosition();
        } else {
            //Storing the positions of the word interstices
            //Account for single space between words
            beginOffsets[i] = ((i == 0) ? 0 : endOffsets[i - 1] + 1);
            endOffsets[i] = beginOffsets[i] + s.length();
        }

        if (sentence.get(i) instanceof CoreLabel) {
            originalCoreLabels[i] = (CoreLabel) sentence.get(i);
        }
        if (sentence.get(i) instanceof HasTag) {
            originalTags[i] = (HasTag) sentence.get(i);
        }

        if (op.testOptions.verbose && (!wordIndex.contains(s) || !lex.isKnown(wordIndex.indexOf(s)))) {
            unk++;
            unkWords.append(' ');
            unkWords.append(s);
            unkWords.append(" { ");
            for (int jj = 0; jj < s.length(); jj++) {
                char ch = s.charAt(jj);
                unkWords.append(Character.getType(ch)).append(" ");
            }
            unkWords.append("}");
        }
        // TODO: really, add a new word?
        //words[i] = wordIndex.indexOf(s, unkIndex);
        //if (words[i] == unkIndex) {
        //  ++unkIndex;
        //}
        //words[i] = wordIndex.indexOf(s, true);
        if (wordIndex.contains(s)) {
            words[i] = wordIndex.indexOf(s);
        } else {
            words[i] = wordIndex.indexOf(Lexicon.UNKNOWN_WORD);
        }
    }

    // initialize inside and outside score arrays
    if (spillGuts) {
        tick("Wiping arrays...");
    }
    for (int start = 0; start < length; start++) {
        for (int end = start + 1; end <= length; end++) {
            Arrays.fill(iScore[start][end], Float.NEGATIVE_INFINITY);
            if (op.doDep && !op.testOptions.useFastFactored) {
                Arrays.fill(oScore[start][end], Float.NEGATIVE_INFINITY);
            }
            if (op.testOptions.lengthNormalization) {
                Arrays.fill(wordsInSpan[start][end], 1);
            }
        }
    }
    for (int loc = 0; loc <= length; loc++) {
        Arrays.fill(narrowLExtent[loc], -1); // the rightmost left with state s ending at i that we can get is the beginning
        Arrays.fill(wideLExtent[loc], length + 1); // the leftmost left with state s ending at i that we can get is the end
    }
    for (int loc = 0; loc < length; loc++) {
        Arrays.fill(narrowRExtent[loc], length + 1); // the leftmost right with state s starting at i that we can get is the end
        Arrays.fill(wideRExtent[loc], -1); // the rightmost right with state s starting at i that we can get is the beginning
    }
    // int puncTag = stateIndex.indexOf(".");
    // boolean lastIsPunc = false;
    if (op.testOptions.verbose) {
        Timing.tick("done.");
        unkWords.append(" ]");
        op.tlpParams.pw(System.err).println("Unknown words: " + unk + " " + unkWords);
        System.err.print("Starting filters...");
    }
    // do tags
    if (spillGuts) {
        tick("Tagging...");
    }
    initializeChart(sentence);
    //if (op.testOptions.outsideFilter)
    // buildOFilter();
    if (op.testOptions.verbose) {
        Timing.tick("done.");
        System.err.print("Starting insides...");
    }
    // do the inside probabilities
    doInsideScores();
    if (op.testOptions.verbose) {
        // insideTime += Timing.tick("done.");
        Timing.tick("done.");
        System.out.println(
                "PCFG parsing " + length + " words (incl. stop): insideScore = " + iScore[0][length][goal]);
    }
    bestScore = iScore[0][length][goal];
    boolean succeeded = hasParse();
    if (op.testOptions.doRecovery && !succeeded && !floodTags) {
        floodTags = true; // sentence will try to reparse
        // ms: disabled message. this is annoying and it doesn't really provide much information
        //System.err.println("Trying recovery parse...");
        return parse(sentence);
    }
    if (!op.doDep || op.testOptions.useFastFactored) {
        return succeeded;
    }
    if (op.testOptions.verbose) {
        System.err.print("Starting outsides...");
    }
    // outside scores
    oScore[0][length][goal] = 0.0f;
    doOutsideScores();
    //System.out.println("State rate: "+((int)(1000*ohits/otries))/10.0);
    //System.out.println("Traversals: "+ohits);
    if (op.testOptions.verbose) {
        // outsideTime += Timing.tick("Done.");
        Timing.tick("done.");
    }

    if (op.doDep) {
        initializePossibles();
    }

    return succeeded;
}