List of usage examples for edu.stanford.nlp.parser.lexparser LexicalizedParser loadModel
public static LexicalizedParser loadModel(ObjectInputStream ois)
From source file:BuildBinarizedDataset.java
/** * Turns a text file into trees for use in a RNTN classifier such as * the treebank used in the Sentiment project. * <br>/*from w w w.jav a 2s . com*/ * The expected input file is one sentence per line, with sentences * separated by blank lines. The first line has the main label of the sentence together with the full sentence. * Lines after the first sentence line but before * the blank line will be treated as labeled sub-phrases. The * labels should start with the label and then contain a list of * tokens the label applies to. All phrases that do not have their own label will take on the main sentence label! * For example: * <br> * <code> * 1 Today is not a good day.<br> * 3 good<br> * 3 good day <br> * 3 a good day <br> * <br> * (next block starts here) <br> * </code> * By default the englishPCFG parser is used. This can be changed * with the <code>-parserModel</code> flag. Specify an input file * with <code>-input</code>. * <br> * If a sentiment model is provided with -sentimentModel, that model * will be used to prelabel the sentences. Any spans with given * labels will then be used to adjust those labels. */ public static void main(String[] arg) throws IOException { CollapseUnaryTransformer transformer = new CollapseUnaryTransformer(); // FileWriter writer = new FileWriter("D:\\dataset\\train.txt", true); String parserModel = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"; String args[] = { "-input", "D:\\parse.txt", "-sentimentModel", "edu/stanford/nlp/models/sentiment/sentiment.ser.gz" }; String inputPath = "D:\\dataset\\good.txt"; String sentimentModelPath = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"; SentimentModel sentimentModel = null; /* for (int argIndex = 0; argIndex < args.length; ) { if (args[argIndex].equalsIgnoreCase("-input")) { inputPath = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-parserModel")) { parserModel = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-sentimentModel")) { sentimentModelPath = args[argIndex + 1]; argIndex += 2; } else { System.err.println("Unknown argument " + args[argIndex]); System.exit(2); } }*/ if (inputPath == null) { throw new IllegalArgumentException("Must specify input file with -input"); } LexicalizedParser parser = LexicalizedParser.loadModel(parserModel); TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack()); if (sentimentModelPath != null) { sentimentModel = SentimentModel.loadSerialized(sentimentModelPath); } String text = IOUtils.slurpFileNoExceptions(inputPath); String[] chunks = text.split("\\n\\s*\\n+"); // need blank line to make a new chunk for (String chunk : chunks) { if (chunk.trim().isEmpty()) { continue; } // The expected format is that line 0 will be the text of the // sentence, and each subsequence line, if any, will be a value // followed by the sequence of tokens that get that value. // Here we take the first line and tokenize it as one sentence. String[] lines = chunk.trim().split("\\n"); String sentence = lines[0]; StringReader sin = new StringReader(sentence); DocumentPreprocessor document = new DocumentPreprocessor(sin); document.setSentenceFinalPuncWords(new String[] { "\n" }); List<HasWord> tokens = document.iterator().next(); Integer mainLabel = new Integer(tokens.get(0).word()); //System.out.print("Main Sentence Label: " + mainLabel.toString() + "; "); tokens = tokens.subList(1, tokens.size()); //System.err.println(tokens); Map<Pair<Integer, Integer>, String> spanToLabels = Generics.newHashMap(); for (int i = 1; i < lines.length; ++i) { extractLabels(spanToLabels, tokens, lines[i]); } // TODO: add an option which treats the spans as constraints when parsing Tree tree = parser.apply(tokens); Tree binarized = binarizer.transformTree(tree); Tree collapsedUnary = transformer.transformTree(binarized); // if there is a sentiment model for use in prelabeling, we // label here and then use the user given labels to adjust if (sentimentModel != null) { Trees.convertToCoreLabels(collapsedUnary); SentimentCostAndGradient scorer = new SentimentCostAndGradient(sentimentModel, null); scorer.forwardPropagateTree(collapsedUnary); setPredictedLabels(collapsedUnary); } else { setUnknownLabels(collapsedUnary, mainLabel); //collapsedUnary.label().setValue(mainLabel.toString()); //System.out.println("Root"+collapsedUnary.getNodeNumber(1)); } Trees.convertToCoreLabels(collapsedUnary); collapsedUnary.indexSpans(); for (Map.Entry<Pair<Integer, Integer>, String> pairStringEntry : spanToLabels.entrySet()) { setSpanLabel(collapsedUnary, pairStringEntry.getKey(), pairStringEntry.getValue()); } String x = collapsedUnary.toString(); //x.replaceAll("\\s",""); x = x.replace("(", "["); x = x.replace(")", "]"); //writer.write(x); //writer.write("\r\n"); System.out.println(x); //System.out.println(); } //writer.close(); }
From source file:ConstituencyParse.java
License:Apache License
public ConstituencyParse(String tokPath, String parentPath, boolean tokenize) throws IOException { this.tokenize = tokenize; if (tokPath != null) { tokWriter = new BufferedWriter(new FileWriter(tokPath)); }//from w w w. ja v a 2 s.c o m parentWriter = new BufferedWriter(new FileWriter(parentPath)); parser = LexicalizedParser.loadModel(PCFG_PATH); binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack()); transformer = new CollapseUnaryTransformer(); // set up to produce dependency representations from constituency trees TreebankLanguagePack tlp = new PennTreebankLanguagePack(); gsf = tlp.grammaticalStructureFactory(); }
From source file:Anaphora_Resolution.ParseAllXMLDocuments.java
public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException, TransformerException { // File dataFolder = new File("DataToPort"); // File[] documents; String grammar = "grammar/englishPCFG.ser.gz"; String[] options = { "-maxLength", "100", "-retainTmpSubcategories" }; //LexicalizedParser lp = new LexicalizedParser(grammar, options); LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); ///* w w w . j a v a 2 s. c o m*/ // if (dataFolder.isDirectory()) { // documents = dataFolder.listFiles(); // } else { // documents = new File[] {dataFolder}; // } // int currfile = 0; // int totfiles = documents.length; // for (File paper : documents) { // currfile++; // if (paper.getName().equals(".DS_Store")||paper.getName().equals(".xml")) { // currfile--; // totfiles--; // continue; // } // System.out.println("Working on "+paper.getName()+" (file "+currfile+" out of "+totfiles+")."); // // DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); // This is for XML // DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); // Document doc = docBuilder.parse(paper.getAbsolutePath()); // // NodeList textlist = doc.getElementsByTagName("text"); // for(int i=0; i < textlist.getLength(); i++) { // Node currentnode = textlist.item(i); // String wholetext = textlist.item(i).getTextContent(); String wholetext = "How about other changes for example the ways of doing the work and \n" + "\n" + "Iwould say security has , there 's more pressure put on people now than there used to be because obviously , especially after Locherbie , they tightened up on security and there 's a lot more pressure now especially from the ETR and stuff like that \n" + "People do n't feel valued any more , they feel I do n't know I think they feel that nobody cares about them really anyway \n"; //System.out.println(wholetext); //Iterable<List<? extends HasWord>> sentences; ArrayList<Tree> parseTrees = new ArrayList<Tree>(); String asd = ""; int j = 0; StringReader stringreader = new StringReader(wholetext); DocumentPreprocessor dp = new DocumentPreprocessor(stringreader); @SuppressWarnings("rawtypes") ArrayList<List> sentences = preprocess(dp); for (List sentence : sentences) { parseTrees.add(lp.apply(sentence)); // Parsing a new sentence and adding it to the parsed tree ArrayList<Tree> PronounsList = findPronouns(parseTrees.get(j)); // Locating all pronouns to resolve in the sentence Tree corefedTree; for (Tree pronounTree : PronounsList) { parseTrees.set(parseTrees.size() - 1, HobbsResolve(pronounTree, parseTrees)); // Resolving the coref and modifying the tree for each pronoun } StringWriter strwr = new StringWriter(); PrintWriter prwr = new PrintWriter(strwr); TreePrint tp = new TreePrint("penn"); tp.printTree(parseTrees.get(j), prwr); prwr.flush(); asd += strwr.toString(); j++; } String armando = ""; for (Tree sentence : parseTrees) { for (Tree leaf : Trees.leaves(sentence)) armando += leaf + " "; } System.out.println(wholetext); System.out.println(); System.out.println("......"); System.out.println(armando); System.out.println("All done."); // currentnode.setTextContent(asd); // } // TransformerFactory transformerFactory = TransformerFactory.newInstance(); // Transformer transformer = transformerFactory.newTransformer(); // DOMSource source = new DOMSource(doc); // StreamResult result = new StreamResult(paper); // transformer.transform(source, result); // // System.out.println("Done"); // } }
From source file:com.daemon.sentiment.FeatureMatrix.java
License:Open Source License
/** * Load the Stanford Parser object - depending on language - necessary to do * POS analysis. Parser object is null, if language is not supported. *///from ww w . java 2s. co m private void loadParser() { // load depending on given language the corresponding lang-model. switch (this.sourceData.getLanguage()) { case "en": lexicalizedParser = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); break; default: _logger.log("POS TAGGING is not supported for " + this.sourceData.getLanguage()); break; } }
From source file:com.epictodo.controller.nlp.NLPLoadEngine.java
License:Open Source License
public NLPLoadEngine() { this.mute();// w w w.j av a 2 s . co m Properties _properties = new Properties(); _properties.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment"); try { CLASSIFIER = CRFClassifier.getClassifierNoExceptions(CLASSIFIER_MODEL); LEXICAL_PARSER = LexicalizedParser.loadModel(ENGLISHPCFG_MODEL); _pipeline = new StanfordCoreNLP(_properties, true); _pipeline.addAnnotator(new TimeAnnotator("sutime", _properties)); _logger.log(Level.INFO, "Successfully loaded models."); } catch (RuntimeException ex) { _logger.log(Level.SEVERE, "Error loading models."); throw ex; } }
From source file:com.github.kutschkem.Qgen.QuestionRankerByParseProbability.java
License:Open Source License
/** * Load the parser from the given location within the classpath. * //from ww w .j a va2 s. c o m * @param aUrl * URL of the parser file. */ // copied from DKPro's StanfordParser private LexicalizedParser getParserDataFromSerializedFile(URL aUrl) throws IOException { ObjectInputStream in; InputStream is = null; try { is = aUrl.openStream(); if (aUrl.toString().endsWith(".gz")) { // it's faster to do the buffering _outside_ the gzipping as // here in = new ObjectInputStream(new BufferedInputStream(new GZIPInputStream(is))); } else { in = new ObjectInputStream(new BufferedInputStream(is)); } LexicalizedParser pd = LexicalizedParser.loadModel(in); // Numberer.setNumberers(pd.numbs); // will happen later in // makeParsers() in.close(); return pd; } finally { closeQuietly(is); } }
From source file:com.parse.Dependency.java
public static void main(String[] args) { LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); lp.setOptionFlags(new String[] { "-maxLength", "80", "-retainTmpSubcategories", }); String[] sent = { "This", "is", "an", "easy", "sentence", "." }; List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent); Tree parse = lp.apply(rawWords);/*from w w w .ja v a 2s . c o m*/ parse.pennPrint(); System.out.println(); TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); List<TypedDependency> tdl = gs.typedDependenciesCCprocessed(); System.out.println(tdl); //System.out.println(); //TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed"); // tp.printTree(parse); String sentence = "which movies were directed by Christopher Nolan"; Tree t2 = lp.parse(sentence); System.out.println(t2.firstChild().toString()); gs = gsf.newGrammaticalStructure(t2); tdl = gs.typedDependenciesCCprocessed(); System.out.println(tdl); System.out.println(tdl.get(0).dep().nodeString()); }
From source file:com.search.MySearchHandler.java
License:Apache License
@Override public void init(PluginInfo info) { init(info.initArgs);/*www . j a va 2 s. co m*/ fieldSet = new HashSet<String>(); fieldSet.addAll(((NamedList) info.initArgs.get("searchFields")).getAll("searchField")); for (PluginInfo child : info.children) { if ("shardHandlerFactory".equals(child.type)) { this.shfInfo = child; break; } } exrelmap = readHashMapFromDisk((String) ((NamedList) info.initArgs.get("inputFiles")).get("exrelmap")); questypemap = readHashMapFromDisk( (String) ((NamedList) info.initArgs.get("inputFiles")).get("questypemap")); mappingmap = readHashMapFromDisk((String) ((NamedList) info.initArgs.get("inputFiles")).get("mappingmap")); // exrelmap = // readHashMapFromDisk("F:\\solr-4.6.0\\solr-4.6.0\\dist\\expandedRelations.properties"); // questypemap = // readHashMapFromDisk("F:\\solr-4.6.0\\solr-4.6.0\\dist\\questionTypeProbability.properties"); // mappingmap = // readHashMapFromDisk("F:\\solr-4.6.0\\solr-4.6.0\\dist\\Mappings150.properties"); try { classifier = CRFClassifier .getClassifier("edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz"); } catch (ClassCastException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); }
From source file:DependencyParser.ParseDependency.java
public ParseDependency() { lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); }
From source file:DependencyParser.Parser.java
public void CallParser(String text) // start of the main method { try {//from www. java 2s.co m TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); LexicalizedParser lp = LexicalizedParser .loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); lp.setOptionFlags(new String[] { "-maxLength", "500", "-retainTmpSubcategories" }); TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(text)).tokenize(); Tree tree = lp.apply(wordList); GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed(true); System.out.println(tdl); PrintWriter pw = new PrintWriter("H:\\Thesis Development\\Thesis\\NLP\\src\\nlp\\Text-Parsed.txt"); TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree, pw); pw.close(); Main.writeImage(tree, tdl, "H:\\Thesis Development\\Thesis\\NLP\\src\\nlp\\image.png", 3); assert (new File("image.png").exists()); } catch (FileNotFoundException f) { } catch (Exception ex) { Logger.getLogger(Parser.class.getName()).log(Level.SEVERE, null, ex); } }