Example usage for edu.stanford.nlp.trees Tree score

List of usage examples for edu.stanford.nlp.trees Tree score

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree score.

Prototype

@Override
public double score() 

Source Link

Document

Returns the score associated with the current node, or NaN if there is no score.

Usage

From source file:ErrorCorrection.java

public static void main(String[] args) throws IOException {
    // TODO code application logic here

    File inputFile = new File(args[0]);
    FileWriter outFile = new FileWriter(args[1], true);

    Pattern p_to = Pattern.compile("\\bto\\b", Pattern.CASE_INSENSITIVE);
    Pattern p_too = Pattern.compile("\\btoo\\b", Pattern.CASE_INSENSITIVE);
    Pattern p_its = Pattern.compile("\\bits\\b", Pattern.CASE_INSENSITIVE);
    Pattern p_it_s = Pattern.compile("\\bit\\s*'s\\b", Pattern.CASE_INSENSITIVE);
    Pattern p_lose = Pattern.compile("\\blose\\b", Pattern.CASE_INSENSITIVE);
    Pattern p_loose = Pattern.compile("\\bloose\\b", Pattern.CASE_INSENSITIVE);
    Pattern p_their = Pattern.compile("\\btheir\\b", Pattern.CASE_INSENSITIVE);
    Pattern p_theyre = Pattern.compile("\\bthey\\s*'re\\b", Pattern.CASE_INSENSITIVE);
    Pattern p_your = Pattern.compile("\\byour\\b", Pattern.CASE_INSENSITIVE);
    Pattern p_youre = Pattern.compile("\\byou\\s*'re\\b", Pattern.CASE_INSENSITIVE);

    BufferedReader in = new BufferedReader(new FileReader(inputFile));
    PrintWriter out = new PrintWriter(outFile);

    String str;//from  w  w w. j  a  va 2 s . c  om
    HashSet<String> generatedStrings = new HashSet();
    List<String> list = new ArrayList<String>();
    while ((str = in.readLine()) != null) {
        list.add(str);
    }

    for (String l : list) {
        if (l.length() != 0) {

            generatedStrings = new HashSet();

            generatedStrings.add(l);
            List<String> new_strings = new ArrayList();
            for (String gl : generatedStrings) {

                Matcher m = p_to.matcher(l);
                int begin = 0;
                int end = 0;

                String word = "";
                while (m.find()) {
                    end = m.start();
                    char fc = m.group().charAt(0);
                    if (Character.isUpperCase(fc)) {
                        word = "Too";
                    } else {
                        word = "too";
                    }
                    String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length());
                    new_strings.add(new_string);

                }
            }

            generatedStrings.addAll(new_strings);
            new_strings = new ArrayList();
            for (String gl : generatedStrings) {

                Matcher m = p_too.matcher(gl);
                int begin = 0;
                int end = 0;
                String word = "";
                while (m.find()) {
                    end = m.start();
                    char fc = m.group().charAt(0);
                    if (Character.isUpperCase(fc)) {
                        word = "To";
                    } else {
                        word = "to";
                    }
                    String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length());
                    new_strings.add(new_string);

                }
            }

            generatedStrings.addAll(new_strings);
            new_strings = new ArrayList();
            for (String gl : generatedStrings) {

                Matcher m = p_its.matcher(gl);
                int begin = 0;
                int end = 0;

                String word = "";
                while (m.find()) {
                    end = m.start();
                    char fc = m.group().charAt(0);
                    if (Character.isUpperCase(fc)) {
                        word = "It's";
                    } else {
                        word = "it's";
                    }
                    String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length());
                    new_strings.add(new_string);

                }
            }

            generatedStrings.addAll(new_strings);
            new_strings = new ArrayList();
            for (String gl : generatedStrings) {

                Matcher m = p_it_s.matcher(gl);
                int begin = 0;
                int end = 0;

                String word = "";
                while (m.find()) {
                    end = m.start();
                    char fc = m.group().charAt(0);
                    if (Character.isUpperCase(fc)) {
                        word = "Its";
                    } else {
                        word = "its";
                    }
                    String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length());
                    new_strings.add(new_string);

                }
            }

            generatedStrings.addAll(new_strings);
            new_strings = new ArrayList();
            for (String gl : generatedStrings) {

                Matcher m = p_lose.matcher(gl);
                int begin = 0;
                int end = 0;

                String word = "";
                while (m.find()) {
                    end = m.start();
                    char fc = m.group().charAt(0);
                    if (Character.isUpperCase(fc)) {
                        word = "Loose";
                    } else {
                        word = "loose";
                    }
                    String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length());
                    new_strings.add(new_string);

                }
            }

            generatedStrings.addAll(new_strings);
            new_strings = new ArrayList();
            for (String gl : generatedStrings) {

                Matcher m = p_loose.matcher(gl);
                int begin = 0;
                int end = 0;

                String word = "";
                while (m.find()) {
                    end = m.start();
                    char fc = m.group().charAt(0);
                    if (Character.isUpperCase(fc)) {
                        word = "Lose";
                    } else {
                        word = "lose";
                    }
                    String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length());
                    new_strings.add(new_string);

                }
            }

            generatedStrings.addAll(new_strings);
            new_strings = new ArrayList();
            for (String gl : generatedStrings) {

                Matcher m = p_their.matcher(gl);
                int begin = 0;
                int end = 0;

                String word = "";
                while (m.find()) {
                    end = m.start();
                    char fc = m.group().charAt(0);
                    if (Character.isUpperCase(fc)) {
                        word = "They're";
                    } else {
                        word = "they're";
                    }
                    String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length());
                    new_strings.add(new_string);

                }
            }

            generatedStrings.addAll(new_strings);

            generatedStrings.addAll(new_strings);
            new_strings = new ArrayList();
            for (String gl : generatedStrings) {

                Matcher m = p_theyre.matcher(gl);
                int begin = 0;
                int end = 0;

                String word = "";
                while (m.find()) {
                    end = m.start();
                    char fc = m.group().charAt(0);
                    if (Character.isUpperCase(fc)) {
                        word = "Their";
                    } else {
                        word = "their";
                    }
                    String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length());
                    new_strings.add(new_string);

                }
            }

            generatedStrings.addAll(new_strings);

            generatedStrings.addAll(new_strings);
            new_strings = new ArrayList();
            for (String gl : generatedStrings) {

                Matcher m = p_your.matcher(gl);
                int begin = 0;
                int end = 0;

                String word = "";
                while (m.find()) {
                    end = m.start();
                    char fc = m.group().charAt(0);
                    if (Character.isUpperCase(fc)) {
                        word = "You're";
                    } else {
                        word = "you're";
                    }
                    String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length());
                    new_strings.add(new_string);

                }
            }

            generatedStrings.addAll(new_strings);
            new_strings = new ArrayList();
            for (String gl : generatedStrings) {

                Matcher m = p_youre.matcher(gl);
                int begin = 0;
                int end = 0;

                String word = "";
                while (m.find()) {
                    end = m.start();
                    char fc = m.group().charAt(0);
                    if (Character.isUpperCase(fc)) {
                        word = "Your";
                    } else {
                        word = "your";
                    }
                    String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length());
                    new_strings.add(new_string);

                }
            }

            generatedStrings.addAll(new_strings);

            double score = 0.0;
            double best = 0.0;
            String best_line = "";
            int i = 0;
            for (String gl : generatedStrings) {

                Tree tr = parse(gl);
                score = tr.score();
                if (i == 0) {
                    best = score;
                    best_line = gl;
                } else {
                    if (best < score) {
                        best = score;
                        best_line = gl;
                    }
                }

                i++;

            }

            out.println(best_line);
        } else {

            out.println(l);
        }

    }

    out.close();

}

From source file:org.ets.research.nlp.stanford_thrift.parser.StanfordParserThrift.java

License:Open Source License

public List<ParseTree> parse_text(String text, List<String> outputFormat) throws TApplicationException {
    List<ParseTree> results = new ArrayList<ParseTree>();

    try {/*from   w ww.  j  av  a  2s  .  co  m*/
        treePrinter = ParserUtil.setOptions(outputFormat, tlp);

        // assume no tokenization was done; use Stanford's default org.ets.research.nlp.stanford_thrift.tokenizer
        DocumentPreprocessor preprocess = new DocumentPreprocessor(new StringReader(text));
        Iterator<List<HasWord>> foundSentences = preprocess.iterator();
        while (foundSentences.hasNext()) {
            Tree parseTree = parser.apply(foundSentences.next());
            results.add(
                    new ParseTree(ParserUtil.TreeObjectToString(parseTree, treePrinter), parseTree.score()));
        }
    } catch (Exception e) {
        // FIXME
        throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage());
    }

    return results;
}

From source file:org.ets.research.nlp.stanford_thrift.parser.StanfordParserThrift.java

License:Open Source License

/**
 * @param tokens One sentence worth of tokens at a time.
 * @return A ParseTree object of the String representation of the tree, plus its probability.
 * @throws TApplicationException/*from www  .  ja  v  a  2s .co  m*/
 */
public ParseTree parse_tokens(List<String> tokens, List<String> outputFormat) throws TApplicationException {
    try {
        treePrinter = ParserUtil.setOptions(outputFormat, tlp);

        // a single sentence worth of tokens
        String[] tokenArray = new String[tokens.size()];
        tokens.toArray(tokenArray);
        List<CoreLabel> crazyStanfordFormat = Sentence.toCoreLabelList(tokenArray);
        Tree parseTree = parser.apply(crazyStanfordFormat);
        return new ParseTree(ParserUtil.TreeObjectToString(parseTree, treePrinter), parseTree.score());
    } catch (Exception e) {
        // FIXME
        throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage());
    }
}

From source file:org.ets.research.nlp.stanford_thrift.parser.StanfordParserThrift.java

License:Open Source License

public ParseTree parse_tagged_sentence(String taggedSentence, List<String> outputFormat, String divider)
        throws TApplicationException {
    try {//from   w  w  w.j  a  v a 2s  .co m
        treePrinter = ParserUtil.setOptions(outputFormat, tlp);

        // a single sentence worth of tagged text, better be properly tokenized >:D
        Tree parseTree = parser
                .apply(CoreNLPThriftUtil.getListOfTaggedWordsFromTaggedSentence(taggedSentence, divider));
        return new ParseTree(ParserUtil.TreeObjectToString(parseTree, treePrinter), parseTree.score());
    } catch (Exception e) {
        // FIXME
        throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage());
    }
}

From source file:org.ets.research.nlp.stanford_thrift.parser.StanfordSRParserThrift.java

License:Open Source License

public ParseTree parseTaggedWords(List<TaggedWord> taggedWords, List<String> outputFormat) throws Exception {
    TreePrint treePrinter = ParserUtil.setOptions(outputFormat, tlp);
    Tree parseTree = model.apply(taggedWords);
    // TODO: Do these parse trees have scores, like the lexicalized ones do?
    return new ParseTree(ParserUtil.TreeObjectToString(parseTree, treePrinter), parseTree.score());
}