Example usage for edu.stanford.nlp.parser.lexparser LexicalizedParser loadModel

List of usage examples for edu.stanford.nlp.parser.lexparser LexicalizedParser loadModel

Introduction

In this page you can find the example usage for edu.stanford.nlp.parser.lexparser LexicalizedParser loadModel.

Prototype

public static LexicalizedParser loadModel(ObjectInputStream ois) 

Source Link

Document

Reads one object from the given ObjectInputStream, which is assumed to be a LexicalizedParser.

Usage

From source file:BuildBinarizedDataset.java

/**
 * Turns a text file into trees for use in a RNTN classifier such as
 * the treebank used in the Sentiment project.
 * <br>/*from   w w  w.jav a 2s  . com*/
 * The expected input file is one sentence per line, with sentences
 * separated by blank lines. The first line has the main label of the sentence together with the full sentence.
 * Lines after the first sentence line but before
 * the blank line will be treated as labeled sub-phrases.  The
 * labels should start with the label and then contain a list of
 * tokens the label applies to. All phrases that do not have their own label will take on the main sentence label!
 *  For example:
 * <br>
 * <code>
 * 1 Today is not a good day.<br>
 * 3 good<br>
 * 3 good day <br>
 * 3 a good day <br>
 * <br>
 * (next block starts here) <br>
 * </code>
 * By default the englishPCFG parser is used.  This can be changed
 * with the <code>-parserModel</code> flag.  Specify an input file
 * with <code>-input</code>.
 * <br>
 * If a sentiment model is provided with -sentimentModel, that model
 * will be used to prelabel the sentences.  Any spans with given
 * labels will then be used to adjust those labels.
 */
public static void main(String[] arg) throws IOException {
    CollapseUnaryTransformer transformer = new CollapseUnaryTransformer();
    // FileWriter writer = new FileWriter("D:\\dataset\\train.txt", true);
    String parserModel = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
    String args[] = { "-input", "D:\\parse.txt", "-sentimentModel",
            "edu/stanford/nlp/models/sentiment/sentiment.ser.gz" };
    String inputPath = "D:\\dataset\\good.txt";

    String sentimentModelPath = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
    SentimentModel sentimentModel = null;

    /* for (int argIndex = 0; argIndex < args.length; ) {
       if (args[argIndex].equalsIgnoreCase("-input")) {
         inputPath = args[argIndex + 1];
         argIndex += 2;
       } else if (args[argIndex].equalsIgnoreCase("-parserModel")) {
         parserModel = args[argIndex + 1];
         argIndex += 2;
       } else if (args[argIndex].equalsIgnoreCase("-sentimentModel")) {
         sentimentModelPath = args[argIndex + 1];
         argIndex += 2;
       } else {
         System.err.println("Unknown argument " + args[argIndex]);
         System.exit(2);
       }
     }*/

    if (inputPath == null) {
        throw new IllegalArgumentException("Must specify input file with -input");
    }

    LexicalizedParser parser = LexicalizedParser.loadModel(parserModel);
    TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(),
            parser.treebankLanguagePack());

    if (sentimentModelPath != null) {
        sentimentModel = SentimentModel.loadSerialized(sentimentModelPath);
    }

    String text = IOUtils.slurpFileNoExceptions(inputPath);
    String[] chunks = text.split("\\n\\s*\\n+"); // need blank line to make a new chunk

    for (String chunk : chunks) {
        if (chunk.trim().isEmpty()) {
            continue;
        }
        // The expected format is that line 0 will be the text of the
        // sentence, and each subsequence line, if any, will be a value
        // followed by the sequence of tokens that get that value.

        // Here we take the first line and tokenize it as one sentence.
        String[] lines = chunk.trim().split("\\n");
        String sentence = lines[0];
        StringReader sin = new StringReader(sentence);
        DocumentPreprocessor document = new DocumentPreprocessor(sin);
        document.setSentenceFinalPuncWords(new String[] { "\n" });
        List<HasWord> tokens = document.iterator().next();
        Integer mainLabel = new Integer(tokens.get(0).word());
        //System.out.print("Main Sentence Label: " + mainLabel.toString() + "; ");
        tokens = tokens.subList(1, tokens.size());
        //System.err.println(tokens);

        Map<Pair<Integer, Integer>, String> spanToLabels = Generics.newHashMap();
        for (int i = 1; i < lines.length; ++i) {
            extractLabels(spanToLabels, tokens, lines[i]);
        }

        // TODO: add an option which treats the spans as constraints when parsing

        Tree tree = parser.apply(tokens);
        Tree binarized = binarizer.transformTree(tree);
        Tree collapsedUnary = transformer.transformTree(binarized);

        // if there is a sentiment model for use in prelabeling, we
        // label here and then use the user given labels to adjust
        if (sentimentModel != null) {
            Trees.convertToCoreLabels(collapsedUnary);
            SentimentCostAndGradient scorer = new SentimentCostAndGradient(sentimentModel, null);
            scorer.forwardPropagateTree(collapsedUnary);
            setPredictedLabels(collapsedUnary);
        } else {
            setUnknownLabels(collapsedUnary, mainLabel);
            //collapsedUnary.label().setValue(mainLabel.toString());
            //System.out.println("Root"+collapsedUnary.getNodeNumber(1));
        }

        Trees.convertToCoreLabels(collapsedUnary);
        collapsedUnary.indexSpans();

        for (Map.Entry<Pair<Integer, Integer>, String> pairStringEntry : spanToLabels.entrySet()) {
            setSpanLabel(collapsedUnary, pairStringEntry.getKey(), pairStringEntry.getValue());
        }
        String x = collapsedUnary.toString();
        //x.replaceAll("\\s","");
        x = x.replace("(", "[");
        x = x.replace(")", "]");
        //writer.write(x);
        //writer.write("\r\n"); 
        System.out.println(x);
        //System.out.println();
    }
    //writer.close();
}

From source file:ConstituencyParse.java

License:Apache License

public ConstituencyParse(String tokPath, String parentPath, boolean tokenize) throws IOException {
    this.tokenize = tokenize;
    if (tokPath != null) {
        tokWriter = new BufferedWriter(new FileWriter(tokPath));
    }//from w w w. ja  v  a  2 s.c  o m
    parentWriter = new BufferedWriter(new FileWriter(parentPath));
    parser = LexicalizedParser.loadModel(PCFG_PATH);
    binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(),
            parser.treebankLanguagePack());
    transformer = new CollapseUnaryTransformer();

    // set up to produce dependency representations from constituency trees
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    gsf = tlp.grammaticalStructureFactory();
}

From source file:Anaphora_Resolution.ParseAllXMLDocuments.java

public static void main(String[] args)
        throws IOException, SAXException, ParserConfigurationException, TransformerException {
    //      File dataFolder = new File("DataToPort");
    //      File[] documents;
    String grammar = "grammar/englishPCFG.ser.gz";
    String[] options = { "-maxLength", "100", "-retainTmpSubcategories" };
    //LexicalizedParser lp =  new LexicalizedParser(grammar, options);
    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    ///*  w w  w . j  a  v a  2  s. c  o  m*/
    //      if (dataFolder.isDirectory()) {
    //          documents = dataFolder.listFiles();
    //      } else {
    //          documents = new File[] {dataFolder};
    //      }
    //      int currfile = 0;
    //      int totfiles = documents.length;
    //      for (File paper : documents) {
    //          currfile++;
    //          if (paper.getName().equals(".DS_Store")||paper.getName().equals(".xml")) {
    //              currfile--;
    //              totfiles--;
    //              continue;
    //          }
    //          System.out.println("Working on "+paper.getName()+" (file "+currfile+" out of "+totfiles+").");
    //
    //          DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); // This is for XML
    //          DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
    //          Document doc = docBuilder.parse(paper.getAbsolutePath());
    //
    //          NodeList textlist = doc.getElementsByTagName("text");
    //          for(int i=0; i < textlist.getLength(); i++) {
    //              Node currentnode = textlist.item(i);
    //              String wholetext = textlist.item(i).getTextContent();
    String wholetext = "How about other changes for example the ways of doing the work and \n" + "\n"
            + "Iwould say security has , there 's more pressure put on people now than there used to be because obviously , especially after Locherbie , they tightened up on security and there 's a lot more pressure now especially from the ETR and stuff like that \n"
            + "People do n't feel valued any more , they feel  I do n't know I think they feel that nobody cares about them really anyway \n";

    //System.out.println(wholetext);
    //Iterable<List<? extends HasWord>> sentences;

    ArrayList<Tree> parseTrees = new ArrayList<Tree>();
    String asd = "";
    int j = 0;
    StringReader stringreader = new StringReader(wholetext);
    DocumentPreprocessor dp = new DocumentPreprocessor(stringreader);
    @SuppressWarnings("rawtypes")
    ArrayList<List> sentences = preprocess(dp);
    for (List sentence : sentences) {
        parseTrees.add(lp.apply(sentence)); // Parsing a new sentence and adding it to the parsed tree
        ArrayList<Tree> PronounsList = findPronouns(parseTrees.get(j)); // Locating all pronouns to resolve in the sentence
        Tree corefedTree;
        for (Tree pronounTree : PronounsList) {
            parseTrees.set(parseTrees.size() - 1, HobbsResolve(pronounTree, parseTrees)); // Resolving the coref and modifying the tree for each pronoun
        }
        StringWriter strwr = new StringWriter();
        PrintWriter prwr = new PrintWriter(strwr);
        TreePrint tp = new TreePrint("penn");
        tp.printTree(parseTrees.get(j), prwr);
        prwr.flush();
        asd += strwr.toString();
        j++;
    }
    String armando = "";
    for (Tree sentence : parseTrees) {
        for (Tree leaf : Trees.leaves(sentence))
            armando += leaf + " ";
    }
    System.out.println(wholetext);

    System.out.println();
    System.out.println("......");
    System.out.println(armando);
    System.out.println("All done.");
    //              currentnode.setTextContent(asd);
    //          }
    //          TransformerFactory transformerFactory = TransformerFactory.newInstance();
    //          Transformer transformer = transformerFactory.newTransformer();
    //          DOMSource source = new DOMSource(doc);
    //          StreamResult result = new StreamResult(paper);
    //          transformer.transform(source, result);
    //
    //          System.out.println("Done");
    //      }
}

From source file:com.daemon.sentiment.FeatureMatrix.java

License:Open Source License

/**
 * Load the Stanford Parser object - depending on language - necessary to do
 * POS analysis. Parser object is null, if language is not supported.
 *///from  ww  w .  java  2s. co m
private void loadParser() {

    // load depending on given language the corresponding lang-model.
    switch (this.sourceData.getLanguage()) {
    case "en":
        lexicalizedParser = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
        break;
    default:
        _logger.log("POS TAGGING is not supported for " + this.sourceData.getLanguage());
        break;
    }

}

From source file:com.epictodo.controller.nlp.NLPLoadEngine.java

License:Open Source License

public NLPLoadEngine() {
    this.mute();// w w w.j  av  a 2  s .  co m
    Properties _properties = new Properties();
    _properties.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment");

    try {
        CLASSIFIER = CRFClassifier.getClassifierNoExceptions(CLASSIFIER_MODEL);
        LEXICAL_PARSER = LexicalizedParser.loadModel(ENGLISHPCFG_MODEL);
        _pipeline = new StanfordCoreNLP(_properties, true);
        _pipeline.addAnnotator(new TimeAnnotator("sutime", _properties));

        _logger.log(Level.INFO, "Successfully loaded models.");
    } catch (RuntimeException ex) {
        _logger.log(Level.SEVERE, "Error loading models.");
        throw ex;
    }
}

From source file:com.github.kutschkem.Qgen.QuestionRankerByParseProbability.java

License:Open Source License

/**
 * Load the parser from the given location within the classpath.
 * //from ww  w .j a va2 s. c  o m
 * @param aUrl
 *            URL of the parser file.
 */
// copied from DKPro's StanfordParser
private LexicalizedParser getParserDataFromSerializedFile(URL aUrl) throws IOException {
    ObjectInputStream in;
    InputStream is = null;
    try {
        is = aUrl.openStream();

        if (aUrl.toString().endsWith(".gz")) {
            // it's faster to do the buffering _outside_ the gzipping as
            // here
            in = new ObjectInputStream(new BufferedInputStream(new GZIPInputStream(is)));
        } else {
            in = new ObjectInputStream(new BufferedInputStream(is));
        }
        LexicalizedParser pd = LexicalizedParser.loadModel(in);
        // Numberer.setNumberers(pd.numbs); // will happen later in
        // makeParsers()
        in.close();
        return pd;
    } finally {
        closeQuietly(is);
    }
}

From source file:com.parse.Dependency.java

public static void main(String[] args) {
    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    lp.setOptionFlags(new String[] { "-maxLength", "80", "-retainTmpSubcategories", });

    String[] sent = { "This", "is", "an", "easy", "sentence", "." };
    List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
    Tree parse = lp.apply(rawWords);/*from   w w w  .ja  v a 2s  .  c o m*/
    parse.pennPrint();
    System.out.println();

    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
    System.out.println(tdl);
    //System.out.println();

    //TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
    // tp.printTree(parse);

    String sentence = "which movies were directed by Christopher Nolan";
    Tree t2 = lp.parse(sentence);
    System.out.println(t2.firstChild().toString());
    gs = gsf.newGrammaticalStructure(t2);
    tdl = gs.typedDependenciesCCprocessed();
    System.out.println(tdl);
    System.out.println(tdl.get(0).dep().nodeString());

}

From source file:com.search.MySearchHandler.java

License:Apache License

@Override
public void init(PluginInfo info) {
    init(info.initArgs);/*www .  j a va 2  s. co  m*/
    fieldSet = new HashSet<String>();
    fieldSet.addAll(((NamedList) info.initArgs.get("searchFields")).getAll("searchField"));
    for (PluginInfo child : info.children) {
        if ("shardHandlerFactory".equals(child.type)) {
            this.shfInfo = child;
            break;
        }
    }

    exrelmap = readHashMapFromDisk((String) ((NamedList) info.initArgs.get("inputFiles")).get("exrelmap"));
    questypemap = readHashMapFromDisk(
            (String) ((NamedList) info.initArgs.get("inputFiles")).get("questypemap"));
    mappingmap = readHashMapFromDisk((String) ((NamedList) info.initArgs.get("inputFiles")).get("mappingmap"));
    // exrelmap =
    // readHashMapFromDisk("F:\\solr-4.6.0\\solr-4.6.0\\dist\\expandedRelations.properties");
    // questypemap =
    // readHashMapFromDisk("F:\\solr-4.6.0\\solr-4.6.0\\dist\\questionTypeProbability.properties");
    // mappingmap =
    // readHashMapFromDisk("F:\\solr-4.6.0\\solr-4.6.0\\dist\\Mappings150.properties");
    try {
        classifier = CRFClassifier
                .getClassifier("edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz");
    } catch (ClassCastException e) {

        e.printStackTrace();
    } catch (ClassNotFoundException e) {

        e.printStackTrace();
    } catch (IOException e) {

        e.printStackTrace();
    }
    lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");

}

From source file:DependencyParser.ParseDependency.java

public ParseDependency() {
    lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
}

From source file:DependencyParser.Parser.java

public void CallParser(String text) // start of the main method

{
    try {//from www. java 2s.co m

        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        LexicalizedParser lp = LexicalizedParser
                .loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
        lp.setOptionFlags(new String[] { "-maxLength", "500", "-retainTmpSubcategories" });
        TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
        List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(text)).tokenize();
        Tree tree = lp.apply(wordList);

        GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
        Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed(true);
        System.out.println(tdl);

        PrintWriter pw = new PrintWriter("H:\\Thesis Development\\Thesis\\NLP\\src\\nlp\\Text-Parsed.txt");
        TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
        tp.printTree(tree, pw);

        pw.close();
        Main.writeImage(tree, tdl, "H:\\Thesis Development\\Thesis\\NLP\\src\\nlp\\image.png", 3);
        assert (new File("image.png").exists());
    } catch (FileNotFoundException f) {

    } catch (Exception ex) {
        Logger.getLogger(Parser.class.getName()).log(Level.SEVERE, null, ex);
    }

}