List of usage examples for edu.stanford.nlp.trees Tree score
@Override public double score()
From source file:ErrorCorrection.java
public static void main(String[] args) throws IOException { // TODO code application logic here File inputFile = new File(args[0]); FileWriter outFile = new FileWriter(args[1], true); Pattern p_to = Pattern.compile("\\bto\\b", Pattern.CASE_INSENSITIVE); Pattern p_too = Pattern.compile("\\btoo\\b", Pattern.CASE_INSENSITIVE); Pattern p_its = Pattern.compile("\\bits\\b", Pattern.CASE_INSENSITIVE); Pattern p_it_s = Pattern.compile("\\bit\\s*'s\\b", Pattern.CASE_INSENSITIVE); Pattern p_lose = Pattern.compile("\\blose\\b", Pattern.CASE_INSENSITIVE); Pattern p_loose = Pattern.compile("\\bloose\\b", Pattern.CASE_INSENSITIVE); Pattern p_their = Pattern.compile("\\btheir\\b", Pattern.CASE_INSENSITIVE); Pattern p_theyre = Pattern.compile("\\bthey\\s*'re\\b", Pattern.CASE_INSENSITIVE); Pattern p_your = Pattern.compile("\\byour\\b", Pattern.CASE_INSENSITIVE); Pattern p_youre = Pattern.compile("\\byou\\s*'re\\b", Pattern.CASE_INSENSITIVE); BufferedReader in = new BufferedReader(new FileReader(inputFile)); PrintWriter out = new PrintWriter(outFile); String str;//from w w w. j a va 2 s . c om HashSet<String> generatedStrings = new HashSet(); List<String> list = new ArrayList<String>(); while ((str = in.readLine()) != null) { list.add(str); } for (String l : list) { if (l.length() != 0) { generatedStrings = new HashSet(); generatedStrings.add(l); List<String> new_strings = new ArrayList(); for (String gl : generatedStrings) { Matcher m = p_to.matcher(l); int begin = 0; int end = 0; String word = ""; while (m.find()) { end = m.start(); char fc = m.group().charAt(0); if (Character.isUpperCase(fc)) { word = "Too"; } else { word = "too"; } String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length()); new_strings.add(new_string); } } generatedStrings.addAll(new_strings); new_strings = new ArrayList(); for (String gl : generatedStrings) { Matcher m = p_too.matcher(gl); int begin = 0; int end = 0; String word = ""; while (m.find()) { end = m.start(); char fc = m.group().charAt(0); if (Character.isUpperCase(fc)) { word = "To"; } else { word = "to"; } String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length()); new_strings.add(new_string); } } generatedStrings.addAll(new_strings); new_strings = new ArrayList(); for (String gl : generatedStrings) { Matcher m = p_its.matcher(gl); int begin = 0; int end = 0; String word = ""; while (m.find()) { end = m.start(); char fc = m.group().charAt(0); if (Character.isUpperCase(fc)) { word = "It's"; } else { word = "it's"; } String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length()); new_strings.add(new_string); } } generatedStrings.addAll(new_strings); new_strings = new ArrayList(); for (String gl : generatedStrings) { Matcher m = p_it_s.matcher(gl); int begin = 0; int end = 0; String word = ""; while (m.find()) { end = m.start(); char fc = m.group().charAt(0); if (Character.isUpperCase(fc)) { word = "Its"; } else { word = "its"; } String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length()); new_strings.add(new_string); } } generatedStrings.addAll(new_strings); new_strings = new ArrayList(); for (String gl : generatedStrings) { Matcher m = p_lose.matcher(gl); int begin = 0; int end = 0; String word = ""; while (m.find()) { end = m.start(); char fc = m.group().charAt(0); if (Character.isUpperCase(fc)) { word = "Loose"; } else { word = "loose"; } String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length()); new_strings.add(new_string); } } generatedStrings.addAll(new_strings); new_strings = new ArrayList(); for (String gl : generatedStrings) { Matcher m = p_loose.matcher(gl); int begin = 0; int end = 0; String word = ""; while (m.find()) { end = m.start(); char fc = m.group().charAt(0); if (Character.isUpperCase(fc)) { word = "Lose"; } else { word = "lose"; } String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length()); new_strings.add(new_string); } } generatedStrings.addAll(new_strings); new_strings = new ArrayList(); for (String gl : generatedStrings) { Matcher m = p_their.matcher(gl); int begin = 0; int end = 0; String word = ""; while (m.find()) { end = m.start(); char fc = m.group().charAt(0); if (Character.isUpperCase(fc)) { word = "They're"; } else { word = "they're"; } String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length()); new_strings.add(new_string); } } generatedStrings.addAll(new_strings); generatedStrings.addAll(new_strings); new_strings = new ArrayList(); for (String gl : generatedStrings) { Matcher m = p_theyre.matcher(gl); int begin = 0; int end = 0; String word = ""; while (m.find()) { end = m.start(); char fc = m.group().charAt(0); if (Character.isUpperCase(fc)) { word = "Their"; } else { word = "their"; } String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length()); new_strings.add(new_string); } } generatedStrings.addAll(new_strings); generatedStrings.addAll(new_strings); new_strings = new ArrayList(); for (String gl : generatedStrings) { Matcher m = p_your.matcher(gl); int begin = 0; int end = 0; String word = ""; while (m.find()) { end = m.start(); char fc = m.group().charAt(0); if (Character.isUpperCase(fc)) { word = "You're"; } else { word = "you're"; } String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length()); new_strings.add(new_string); } } generatedStrings.addAll(new_strings); new_strings = new ArrayList(); for (String gl : generatedStrings) { Matcher m = p_youre.matcher(gl); int begin = 0; int end = 0; String word = ""; while (m.find()) { end = m.start(); char fc = m.group().charAt(0); if (Character.isUpperCase(fc)) { word = "Your"; } else { word = "your"; } String new_string = gl.substring(begin, end) + word + gl.substring(m.end(), gl.length()); new_strings.add(new_string); } } generatedStrings.addAll(new_strings); double score = 0.0; double best = 0.0; String best_line = ""; int i = 0; for (String gl : generatedStrings) { Tree tr = parse(gl); score = tr.score(); if (i == 0) { best = score; best_line = gl; } else { if (best < score) { best = score; best_line = gl; } } i++; } out.println(best_line); } else { out.println(l); } } out.close(); }
From source file:org.ets.research.nlp.stanford_thrift.parser.StanfordParserThrift.java
License:Open Source License
public List<ParseTree> parse_text(String text, List<String> outputFormat) throws TApplicationException { List<ParseTree> results = new ArrayList<ParseTree>(); try {/*from w ww. j av a 2s . co m*/ treePrinter = ParserUtil.setOptions(outputFormat, tlp); // assume no tokenization was done; use Stanford's default org.ets.research.nlp.stanford_thrift.tokenizer DocumentPreprocessor preprocess = new DocumentPreprocessor(new StringReader(text)); Iterator<List<HasWord>> foundSentences = preprocess.iterator(); while (foundSentences.hasNext()) { Tree parseTree = parser.apply(foundSentences.next()); results.add( new ParseTree(ParserUtil.TreeObjectToString(parseTree, treePrinter), parseTree.score())); } } catch (Exception e) { // FIXME throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage()); } return results; }
From source file:org.ets.research.nlp.stanford_thrift.parser.StanfordParserThrift.java
License:Open Source License
/** * @param tokens One sentence worth of tokens at a time. * @return A ParseTree object of the String representation of the tree, plus its probability. * @throws TApplicationException/*from www . ja v a 2s .co m*/ */ public ParseTree parse_tokens(List<String> tokens, List<String> outputFormat) throws TApplicationException { try { treePrinter = ParserUtil.setOptions(outputFormat, tlp); // a single sentence worth of tokens String[] tokenArray = new String[tokens.size()]; tokens.toArray(tokenArray); List<CoreLabel> crazyStanfordFormat = Sentence.toCoreLabelList(tokenArray); Tree parseTree = parser.apply(crazyStanfordFormat); return new ParseTree(ParserUtil.TreeObjectToString(parseTree, treePrinter), parseTree.score()); } catch (Exception e) { // FIXME throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage()); } }
From source file:org.ets.research.nlp.stanford_thrift.parser.StanfordParserThrift.java
License:Open Source License
public ParseTree parse_tagged_sentence(String taggedSentence, List<String> outputFormat, String divider) throws TApplicationException { try {//from w w w.j a v a 2s .co m treePrinter = ParserUtil.setOptions(outputFormat, tlp); // a single sentence worth of tagged text, better be properly tokenized >:D Tree parseTree = parser .apply(CoreNLPThriftUtil.getListOfTaggedWordsFromTaggedSentence(taggedSentence, divider)); return new ParseTree(ParserUtil.TreeObjectToString(parseTree, treePrinter), parseTree.score()); } catch (Exception e) { // FIXME throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage()); } }
From source file:org.ets.research.nlp.stanford_thrift.parser.StanfordSRParserThrift.java
License:Open Source License
public ParseTree parseTaggedWords(List<TaggedWord> taggedWords, List<String> outputFormat) throws Exception { TreePrint treePrinter = ParserUtil.setOptions(outputFormat, tlp); Tree parseTree = model.apply(taggedWords); // TODO: Do these parse trees have scores, like the lexicalized ones do? return new ParseTree(ParserUtil.TreeObjectToString(parseTree, treePrinter), parseTree.score()); }