Example usage for edu.stanford.nlp.pipeline StanfordCoreNLP annotate

Introduction

In this page you can find the example usage for edu.stanford.nlp.pipeline StanfordCoreNLP annotate.

Prototype

@Override
public void annotate(Annotation annotation)

Source Link

Usage

From source file:Treeparse.java

public static void main(String[] args) {
    // TODO code application logic here

    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma,parse");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    System.out.println("Enter the text:");
    Scanner sc = new Scanner(System.in);
    text = sc.nextLine();/*from ww  w  .j av  a2 s  .  c  om*/

    //while(text!="exit")
    //{
    Annotation document = new Annotation(text);
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    for (CoreMap sentence : sentences) {

        token_length = sentence.get(TokensAnnotation.class).size();
        arr1 = new String[POSTagger.token_length];
        arr2 = new String[POSTagger.token_length];
        int i = 0, j = 0;

        // System.out.println("Size"+token_length);
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {

            String word = token.get(TextAnnotation.class);

            String pos = token.get(PartOfSpeechAnnotation.class);
            //    String ner = token.get(NamedEntityTagAnnotation.class);

        }
        Tree tree = sentence.get(TreeAnnotation.class);
        // System.out.println(tree);
        List<Tree> x = GetNounPhrases(tree);
        System.out.println(x);

        // Print words and Pos Tags
        /*for (Tree leaf : leaves) { 
            Tree parent = leaf.parent(tree);
            System.out.print(leaf.label().value() + "-" + parent.label().value() + " ");
        }*/
    }
    //System.out.println("Enter the text:");
    //text=sc.nextLine();  

}

From source file:rev.java

/**
 * Processes requests for both HTTP <code>GET</code> and <code>POST</code>
 * methods.//from www . ja v  a2 s . c  o  m
 *
 * @param request servlet request
 * @param response servlet response
 * @throws ServletException if a servlet-specific error occurs
 * @throws IOException if an I/O error occurs
 */
protected void processRequest(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException, SQLException, ClassNotFoundException {
    response.setContentType("text/html;charset=UTF-8");
    try (PrintWriter out = response.getWriter()) {
        String a = request.getParameter("userMsg");

        /* TODO output your page here. You may use following sample code. */
        out.println("<!DOCTYPE HTML>\n" + "<head>\n"
                + "<link href=\"css/style.css\" rel=\"stylesheet\" type=\"text/css\" media=\"all\"/>\n"
                + "<link href=\"css/slider.css\" rel=\"stylesheet\" type=\"text/css\" media=\"all\"/>\n"
                + "<script type=\"text/javascript\" src=\"js/jquery-1.9.0.min.js\"></script>\n"
                + "<script type=\"text/javascript\" src=\"js/move-top.js\"></script>\n"
                + "<script type=\"text/javascript\" src=\"js/easing.js\"></script>\n"
                + "<script type=\"text/javascript\" src=\"js/jquery.nivo.slider.js\"></script>\n"
                + "<script type=\"text/javascript\">\n" + "    $(window).load(function() {\n"
                + "        $('#slider').nivoSlider();\n" + "    });\n" + "    <%! String n;\n" + "    %>\n"
                + "    <%  \n" + "          \n" + "        n=(String)session.getAttribute(\"uname\"); \n"
                + "        %>\n" + "    </script>\n" + "</head>\n" + "<body>\n" + "   <div class=\"header\">\n"
                + "       <div class=\"headertop_desc\">\n" + "         <div class=\"wrap\">\n"
                + "            <div class=\"nav_list\">\n" + "               \n" + "            </div>\n"
                + "            <div class=\"account_desc\">\n" + "                  <ul>\n"
                + "                     <li><a href=\"available.jsp\">Available movies</a></li>\n"
                + "                     <li><a href=\"takereview.jsp\">Review Movies</a></li>\n"
                + "                     <li><a href=\"rated.jsp\">Movies Rated</a></li>\n"
                + "                                                        <li><a href=\"abc.jsp\">Recommend Me</a></li>\n"
                + "                                                        \n"
                + "                                                        <li><a href=\"contact.html\">Contact</a></li>\n"
                + "                                                        <li><a href=\"logout\">Logout</a></li>\n"
                + "                                                </ul>\n" + "               </div>\n"
                + "            <div class=\"clear\"></div>\n" + "         </div>\n" + "        </div>\n"
                + "             <div class=\"wrap\">\n" + "            <div class=\"header_top\">\n"
                + "               <div class=\"logo\">\n"
                + "                  <a href=\"index.html\"><img src=\"images/logo1.jpg\" alt=\"\" /></a>\n"
                + "               </div>\n" + "               <div class=\"header_top_right\">\n"
                + "                <div class=\"search_box\">\n" + "                          \n"
                + "                       </div>\n" + "                    <div class=\"clear\"></div>\n"
                + "               </div>\n" + "                   \n"
                + "          <div class=\"clear\"></div>\n" + "        </div>\n" + "            \n" + "   \n"
                + "\n" + "\n" + "");

        String line = "this book is too good to sleep";
        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, ssplit, parse, sentiment, lemma");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

        Annotation annotation = new Annotation(a);

        pipeline.annotate(annotation);

        annotation.toShorterString();

        List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);

        if (sentences != null && !sentences.isEmpty()) {
            for (int i = 0; i < sentences.size(); i++) {
                CoreMap sentence = sentences.get(i);
                Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
                int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
                String sentimentName = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
                //Class.forName("com.mysql.jdbc.Driver");

                /*String connectionURL = "jdbc:mysql://localhost:3306/review";
                       Connection conn;
                        Statement stmt;
                        ResultSet rs;
                      conn = DriverManager.getConnection (connectionURL,"root","");
                       stmt = conn.createStatement();
                      // rs = stmt.executeQuery("");
                      out.println();
                         
                        
                */

                out.println("The sentence is:");
                sentence.get(CoreAnnotations.TextAnnotation.class);

                //out.println("Sentiment of \n> \""++"\"\nis: " + sentiment+" (i.e., "+sentimentName+")");
                out.println(sentimentName + " " + sentiment);
                if (sentimentName.equalsIgnoreCase("Negative")) {
                    final String negative = "negative";
                    final String positive = "positive";

                    final String nuetral = "nuetral";
                    final String verypositive = "very positive";
                    final String verynegative = " very negative";
                    final DefaultCategoryDataset dataset = new DefaultCategoryDataset();

                    //out.println("NOT");
                    dataset.addValue(0, positive, positive);
                    dataset.addValue(sentiment, negative, negative);
                    dataset.addValue(0, nuetral, nuetral);
                    dataset.addValue(0, verynegative, verynegative);
                    dataset.addValue(0, verypositive, verypositive);
                    JFreeChart barChart = ChartFactory.createBarChart("Movie Reviews", "Ratings", "Sentiments",
                            dataset, PlotOrientation.VERTICAL, true, true, false);

                    int width = 640; /* Width of the image */
                    int height = 480; /* Height of the image */
                    File BarChart = new File("/home/rishabh/NetBeansProjects/minor/web/images/k.jpeg");
                    ChartUtilities.saveChartAsJPEG(BarChart, barChart, width, height);
                    out.println("<img src=\"images/BarChart.jpeg\">");

                } else if (sentimentName.equalsIgnoreCase("Positive")) {
                    final String negative = "negative";
                    final String positive = "positive";

                    final String nuetral = "nuetral";
                    final String verypositive = "very positive";
                    final String verynegative = " very negative";
                    final DefaultCategoryDataset dataset = new DefaultCategoryDataset();

                    // out.println("Good");
                    dataset.addValue(sentiment, positive, positive);
                    dataset.addValue(0, negative, negative);
                    dataset.addValue(0, nuetral, nuetral);
                    dataset.addValue(0, verynegative, verynegative);
                    dataset.addValue(0, verypositive, verypositive);
                    JFreeChart barChart = ChartFactory.createBarChart("Movie Reviews", "Ratings", "Sentiments",
                            dataset, PlotOrientation.VERTICAL, true, true, false);

                    int width = 640; /* Width of the image */
                    int height = 480; /* Height of the image */
                    File BarChart = new File("/home/rishabh/NetBeansProjects/minor/web/images/k.jpeg");
                    ChartUtilities.saveChartAsJPEG(BarChart, barChart, width, height);
                    out.println("<img src=\"images/BarChart1.jpeg\">");

                } else if (sentimentName.equalsIgnoreCase("Neutral")) {
                    final String negative = "negative";
                    final String positive = "positive";

                    final String nuetral = "nuetral";
                    final String verypositive = "very positive";
                    final String verynegative = " very negative";
                    final DefaultCategoryDataset dataset = new DefaultCategoryDataset();

                    //out.println("Good");
                    dataset.addValue(0, positive, positive);
                    dataset.addValue(0, negative, negative);
                    dataset.addValue(sentiment, nuetral, nuetral);
                    dataset.addValue(0, verynegative, verynegative);
                    dataset.addValue(0, verypositive, verypositive);
                    JFreeChart barChart = ChartFactory.createBarChart("Movie Reviews", "Ratings", "Sentiments",
                            dataset, PlotOrientation.VERTICAL, true, true, false);

                    int width = 640; /* Width of the image */
                    int height = 480; /* Height of the image */
                    File BarChart = new File("/home/rishabh/NetBeansProjects/minor/web/images/k.jpeg");
                    ChartUtilities.saveChartAsJPEG(BarChart, barChart, width, height);

                    out.println("<img src=\"images/BarChart2.jpeg\">");
                } else if (sentimentName.equalsIgnoreCase("Very Positive")) {
                    final String negative = "negative";
                    final String positive = "positive";

                    final String nuetral = "nuetral";
                    final String verypositive = "very positive";
                    final String verynegative = " very negative";
                    final DefaultCategoryDataset dataset = new DefaultCategoryDataset();

                    //out.println("Good");
                    dataset.addValue(0, positive, positive);
                    dataset.addValue(0, negative, negative);
                    dataset.addValue(0, nuetral, nuetral);
                    dataset.addValue(0, verynegative, verynegative);
                    dataset.addValue(sentiment, verypositive, verypositive);
                    JFreeChart barChart = ChartFactory.createBarChart("Movie Reviews", "Ratings", "Sentiments",
                            dataset, PlotOrientation.VERTICAL, true, true, false);

                    int width = 640; /* Width of the image */
                    int height = 480; /* Height of the image */
                    File BarChart = new File("/home/rishabh/NetBeansProjects/minor/web/images/k.jpeg");
                    ChartUtilities.saveChartAsJPEG(BarChart, barChart, width, height);
                    out.println("<img src=\"images/BarChart4.jpeg\">");

                } else if (sentimentName.equalsIgnoreCase("Very Negative")) {
                    final String negative = "negative";
                    final String positive = "positive";

                    final String nuetral = "nuetral";
                    final String verypositive = "very positive";
                    final String verynegative = " very negative";
                    final DefaultCategoryDataset dataset = new DefaultCategoryDataset();

                    //out.println("Good");
                    dataset.addValue(0, positive, positive);
                    dataset.addValue(0, negative, negative);
                    dataset.addValue(0, nuetral, nuetral);
                    dataset.addValue(sentiment, verynegative, verynegative);
                    dataset.addValue(0, verypositive, verypositive);
                    JFreeChart barChart = ChartFactory.createBarChart("Movie Reviews", "Ratings", "Sentiments",
                            dataset, PlotOrientation.VERTICAL, true, true, false);

                    int width = 640; /* Width of the image */
                    int height = 480; /* Height of the image */
                    File BarChart = new File("/home/rishabh/NetBeansProjects/minor/web/images/k.jpeg");
                    ChartUtilities.saveChartAsJPEG(BarChart, barChart, width, height);

                    out.println("<img src=\"images/BarChart3.jpeg\">");
                }

            }
        }

        out.println("<div class=\"footer\">\n" + "        <div class=\"wrap\">\n"
                + "        <div class=\"section group\">\n" + "            <div class=\"col span\">\n"
                + "                  <h4>Information</h4>\n" + "                  <ul>\n"
                + "                  <li><a href=\"#\">About Us</a></li>\n" + "                  \n"
                + "                  <li><a href=\"contact.html\">Contact Us</a></li>\n"
                + "                  </ul>\n" + "               </div>\n"
                + "            <div class=\"col span\">\n" + "               <h4>Know us better</h4>\n"
                + "                  <ul>\n" + "                  <li><a href=\"#\">About Us</a></li>\n"
                + "            \n" + "                  <li><a href=\"contact.html\">Site Map</a></li>\n"
                + "                  <li><a href=\"#\">Search Terms</a></li>\n" + "                  </ul>\n"
                + "            </div>\n" + "            \n" + "            <div class=\"col span\">\n"
                + "               <h4>Contact</h4>\n" + "                  <ul>\n"
                + "                     <li><span>9971825755</span></li>\n"
                + "                     <li><span>8130527232</span></li>\n" + "                  </ul>\n"
                + "                  <div class=\"social-icons\">\n"
                + "                     <h4>Follow Us</h4>\n" + "                          <ul>\n"
                + "                           <li><a href=\"#\" target=\"_blank\"><img src=\"images/facebook.png\" alt=\"\" /></a></li>\n"
                + "                           <li><a href=\"#\" target=\"_blank\"><img src=\"images/twitter.png\" alt=\"\" /></a></li>\n"
                + "                           <li><a href=\"#\" target=\"_blank\"><img src=\"images/skype.png\" alt=\"\" /> </a></li>\n"
                + "                           <li><a href=\"#\" target=\"_blank\"> <img src=\"images/linkedin.png\" alt=\"\" /></a></li>\n"
                + "                           <div class=\"clear\"></div>\n" + "                       </ul>\n"
                + "                      </div>\n" + "            </div>\n" + "         </div>\n"
                + "          <div class=\"copy_right\">\n"
                + "            <p>Company Name  All rights Reseverd </p>\n" + "         </div>\n"
                + "        </div>\n" + "    </div>\n" + "    <script type=\"text/javascript\">\n"
                + "      $(document).ready(function() {\n"
                + "         $().UItoTop({ easingType: 'easeOutQuart' });\n" + "\n" + "      });\n"
                + "   </script>\n" + "    <a href=\"#\" id=\"toTop\"><span id=\"toTopHover\"> </span></a>\n"
                + "</body>\n" + "</html>\n" + "\n" + "");

    }
}

From source file:analytics.weka.EnglishTextAnnotations.java

License:Apache License

public static void main(String[] args) {
    // creates a StanfordCoreNLP object, with POS tagging, lemmatization,
    // NER, parsing, and coreference resolution
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner");//, parse, sentiment
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // read some text in the text variable
    String text = "jackie brown ( miramax - 1997 ) starring pam grier , samuel l . jackson , robert forster , bridget fonda , michael keaton , robert de niro , michael bowen , chris tucker screenplay by quentin tarantino , based on the novel rum punch by elmore leonard produced by lawrence bender directed by quentin tarantino running time : 155 minutes note : some may consider portions of the following text to be spoilers . be forewarned . ------------------------------------------------------------- during the three years since the release of the groundbreaking success pulp fiction , the cinematic output from its creator , quentin tarantino , has been surprisingly low . oh , he\'s been busy -- doing the talk show circuit , taking small roles in various films , overseeing the production of his screenplay from dusk till dawn , making cameo appearances on television shows , providing a vignette for the ill-fated anthology four rooms -- everything , it seems , except direct another feature-length film . it\'s been the long intermission between projects as well as the dizzying peak which pulp fiction reached which has made mr . tarantino\'s new feature film , jackie brown , one of the most anticipated films of the year , and his third feature film cements his reputation as the single most important new american filmmaker to emerge from the 1990s . things aren\'t going well for jackie brown ( pam grier ) . she\'s 44 years old , stuck at a dead-end job ( \" $16 , 000 a year , plus retirement benefits that aren\'t worth a damn \" ) as a flight attendant for the worst airline in north america -- and she\'s just been caught at the airport by atf agent ray nicolette ( portrayed with terrific childlike enthusiasm by michael keaton ) and police officer mark dargus ( michael bowen ) smuggling $50 000 from mexico for gun-runner ordell robbie ( samuel l . jackson ) , who has her bailed out by unassuming bail bondsman max cherry ( robert forster ) . the loquacious ordell , based out of a hermosa beach house where his horny , bong-hitting surfer girl melanie ( bridget fonda ) and agreeable crony louis gara ( robert de niro ) hang out , operates under the policy that the best rat is a dead rat , and he\'s soon out to silence jackie brown . meanwhile , the authorities\' target is ordell , and they want jackie to help them by arranging a sting to the tune of a half-million dollars . only through a series of clever twists , turns , and double-crosses will jackie be able to gain the upper hand on both of her nemeses . although jackie brown marks mr . tarantino\'s first produced screenplay adaptation ( based on the elmore leonard novel \" rum punch \" ) , there\'s no mistaking his distinctive fingerprints all over this film . while he\'s adhered closely to the source material in a narrative sense , the setting has been relocated to los angeles and the lead character\'s now black . in terms of ambiance , the film harkens back to the 1970s , from the wall-to-wall funk and soul music drowning the soundtrack to the nondescript look of the sets -- even the opening title credit sequence has the echo of vintage 1970s productions . the opening sequence featuring ms . grier wordlessly striding through the lax , funky music blaring away on the speakers , is emblematic of films of that era . the timeframe for the film is in fact 1995 , but the atmosphere of jackie brown is decidedly retro . of course , nothing in the film screams 1970s more than the casting of pam grier and robert forster as the two leads , and although the caper intrigue is fun to watch as the plot twists , backstabbing , and deceptions deliciously unfold , the strength of jackie brown is the quiet , understated relationship developed between jackie and max ; when they kiss , it\'s perhaps the most tender scene of the year . tenderness ? in a quentin tarantino film ? sure , there\'ve been moments of sweetness in his prior films -- the affectionate exchanges between the bruce willis and maria de madeiros characters in pulp fiction and the unflagging dedication shared by the characters of tim roth and amanda plummer , or even in reservoir dogs , where a deep , unspoken bond develops between the harvey keitel and tim roth characters -- but for the most part , mr . tarantino\'s films are typified by manic energy , unexpected outbursts of violence , and clever , often wordy , banter . these staples of his work are all present in jackie brown , but what\'s new here is a different facet of his storytelling -- a willingness to imbue the film with a poignant emotional undercurrent , and a patience to draw out several scenes with great deliberation . this effective demonstration of range prohibits the pigeonholing of mr . tarantino as simply a helmer of slick , hip crime dramas with fast-talking lowlifes , and heralds him as a bonafide multifaceted talent ; he\'s the real deal . this new aspect of mr . tarantino\'s storytelling is probably best embodied in a single character -- that of the world-weary , sensitive , and exceedingly-professional max cherry , whose unspoken attraction to jackie is touching . mr . forster\'s nuanced , understated performance is the best in the film ; he creates an amiable character of such poignancy that when he gazes at jackie , we smile along with him . much press has been given about the casting of blaxploitation-era icon pam grier in the lead , with the wags buzzing that mr . tarantino may do for her what his pulp fiction did to bolster john travolta\'s then-sagging career . as it turns out , ms . grier is solid in the film\'s title role , although nothing here forces her to test her range . i do have to take exception to the claim that this film marks her career resurrection , though -- she\'s been working steadily over the years , often in direct-to-video action flicks , but also in such recent theatrical releases as tim burton\'s mars attacks ! and larry cohen\'s original gangstas ( where she first teamed up with mr . forster . ) of course , it\'s true that her role here was a godsend -- a meaty a part as this is rarity for * any * actress , let alone one of her age and current status in the industry . while jackie brown may disappoint those looking for another pulp fiction clone , it marks tremendous growth of mr . tarantino as a director whose horizons are rapidly expanding , and whose characterizations have never been better . and while the film\'s narrative doesn\'t really warrant a running time of 155 minutes , it\'s filled with such sumptuous riches , ranging from the brashness of the vivid soundtrack to entertaining , inconsequential conversations between the characters , that there wasn\'t an unengaging moment . with an impressive trio of feature films under his belt , it\'ll be interesting to see what he tries next . \r\n";

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    System.out.println(text);/*from  w  ww.j  a v a 2  s. c  om*/
    System.out.println(document.get(TextAnnotation.class));

    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and
    // has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    StringBuilder lemmas = new StringBuilder();
    for (CoreMap sentence : sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        //System.out.println("sentence: "+sentence.get(SentenceBeginAnnotation.class));
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {

            String word = token.get(TextAnnotation.class);
            //System.out.println("this is the text of the token: "+word);
            // 
            String pos = token.get(LemmaAnnotation.class);

            // 
            String ne = token.get(NamedEntityTagAnnotation.class);
            //System.out.println("this is the NER label of the token: "+ne);
            if ("O".equals(ne))
                lemmas.append(pos).append(" ");
        }

        //System.out.println("sentence: "+sentence.get(SentenceEndAnnotation.class));
    }

    System.out.println("this is the lemma tag of the token: " + lemmas);
}

From source file:Anaphora_Resolution.AnaphoraDetection.java

public void anophora() {
    String text = "Tom is a smart boy. He know a lot of thing.";

    Annotation document = new Annotation(text);
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit,parse, lemma, ner, dcoref");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(document);

    Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
    for (Integer i : graph.keySet()) {
        System.out.println("GROUP " + i);
        CorefChain x = graph.get(i);/*from w w  w .  j  a  v a 2s . co m*/
        for (CorefMention m : x.getMentionsInTextualOrder()) {
            System.out.println(m.mentionSpan);
        }
    }

}

From source file:Anaphora_Resolution.Coref.java

/**
 * @param args the command line arguments
 *//*  w w w  .jav  a  2 s  .c  o  m*/
public static void main(String[] args) throws IOException, ClassNotFoundException {
    // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    props.put("pos.model",
            "H:\\nlp jar files\\stanford-postagger-2014-08-27\\stanford-postagger-2014-08-27\\models\\english-left3words-distsim.tagger");
    //props.put("dcoref.big.gender.number", "edu/stanford/nlp/models/dcoref/gender.data.gz");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // read some text in the text variable
    String text = "Mary has a little lamb. She is very cute."; // Add your text here!

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    for (CoreMap sentence : sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            // this is the text of the token
            String word = token.get(TextAnnotation.class);
            // this is the POS tag of the token
            String pos = token.get(PartOfSpeechAnnotation.class);
            // this is the NER label of the token
            String ne = token.get(NamedEntityTagAnnotation.class);
        }

        // this is the parse tree of the current sentence
        Tree tree = sentence.get(TreeAnnotation.class);
        System.out.println(tree);

        // this is the Stanford dependency graph of the current sentence
        SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
    }

    // This is the coreference link graph
    // Each chain stores a set of mentions that link to each other,
    // along with a method for getting the most representative mention
    // Both sentence and token offsets start at 1!
    Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
    System.out.println(graph);
}

From source file:candidateGeneration.remove_missingContext.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    InputStream is = new FileInputStream(sentence_detect_model);
    SentenceModel model = new SentenceModel(is);
    SentenceDetectorME sdetector = new SentenceDetectorME(model);

    Properties props = new Properties();
    props.put("annotators", "tokenize,ssplit,pos,lemma,ner,parse,dcoref");
    StanfordCoreNLP pi = new StanfordCoreNLP(props);

    File writeFile = new File(
            "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\Candidate_Generation\\good_sentences_new.txt");
    writeFile.createNewFile();//from w w w.j av  a 2  s .  com
    FileWriter writer = new FileWriter(writeFile);

    File writeFile2 = new File(
            "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\Candidate_Generation\\bad_sentences_new.txt");
    writeFile2.createNewFile();
    FileWriter writer2 = new FileWriter(writeFile2);

    String folderPath = "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\movieTest\\indivFiles\\";
    File[] files = new File(folderPath).listFiles();
    for (File file : files) {
        if (file.isFile()) {
            String name = file.getName();
            name = name.replace("_", " ");
            name = name.replace("%28", "(");
            name = name.replace("%29", ")");
            name = name.replace(".txt", "");
            System.out.println("File: " + name);

            FileReader inputFile = new FileReader(folderPath + file.getName());
            BufferedReader bufferReader = new BufferedReader(inputFile);
            String input;

            while ((input = bufferReader.readLine()) != null) {
                //System.out.println("Line: " + input);
                String sentences[] = sdetector.sentDetect(input);
                HashMap<Integer, Integer> toRemove = new HashMap<>();
                Annotation doc = new Annotation(input);
                pi.annotate(doc);
                Map<Integer, CorefChain> graph = doc.get(CorefCoreAnnotations.CorefChainAnnotation.class);

                for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()) {
                    CorefChain c = entry.getValue();

                    if (c.getMentionsInTextualOrder().size() <= 1) {
                        continue;
                    }

                    //System.out.println("Mentions: " + c.toString());
                    String[] sentenceOccurence = c.toString().split(" ");
                    int firstOccurence = -1;
                    for (int i = 0; i < sentenceOccurence.length; i++) {
                        if (firstOccurence == -1 && sentenceOccurence[i].equals("sentence")) {
                            //System.out.println("first occurence : " + sentenceOccurence[i+1]);
                            firstOccurence = Integer
                                    .parseInt(sentenceOccurence[i + 1].replace(",", "").replace("]", ""));
                            continue;
                        }

                        if (sentenceOccurence[i].equals("sentence")) {
                            //System.out.println("further occurence : "+sentenceOccurence[i+1]);
                            if (Integer.parseInt(sentenceOccurence[i + 1].replace(",", "").replace("]",
                                    "")) != firstOccurence) {
                                //System.out.println("Added " + sentenceOccurence[i+1].replace(",", "").replace("]", "") + " for removal");
                                toRemove.put(Integer.parseInt(
                                        sentenceOccurence[i + 1].replace(",", "").replace("]", "")), 1);
                            }
                        }
                    }
                    //System.out.println(c.toString());
                }

                int cand_i = 1;
                for (String candidate_sentence : sentences) {
                    if (toRemove.containsKey(cand_i)) {
                        //System.out.println("REMOVING: " + candidate_sentence + "\n");
                        writer2.write(name + "\t" + candidate_sentence + "\n");
                        continue;
                    }
                    //System.out.println("TAKING: " + candidate_sentence + "\n");
                    writer.write(name + "\t" + candidate_sentence + "\n");
                    cand_i++;
                }
                //System.in.read();
            }
            //System.out.println("Line done");
            bufferReader.close();
            //System.in.read();
        }
        writer.flush();
        writer2.flush();
    }
    writer.close();
    writer2.close();
}

From source file:cl.usach.ner.Ner.java

public void do_ner() {

    nueva = new ArrayList<>();
    StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators",
            "tokenize,ssplit,pos,lemma,ner,parse,dcoref", "tokenize.language", "es"));

    // read some text in the text variable
    String text = tweet;//  www.  ja v a 2 s  . com
    text = text.replace("#", "");
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    oracion = text;
    for (CoreMap sentence : sentences) {
        System.out.println(sentence);
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            // this is the text of the token
            System.out.println("Palabra:");
            String word = token.get(TextAnnotation.class);
            System.out.println(word);
            ArrayList<String> temp = new ArrayList<>();
            temp.add(word);
            // this is the POS tag of the token
            String pos = token.get(PartOfSpeechAnnotation.class);
            // this is the NER label of the token
            String ne = token.get(NamedEntityTagAnnotation.class);
            System.out.println(ne);
            if (ne != null) {

                temp.add(ne);
            }

            nueva.add(temp);
        }

        // this is the parse tree of the current sentence
        Tree tree = sentence.get(TreeAnnotation.class);

        // this is the Stanford dependency graph of the current sentence
        SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
    }

    // This is the coreference link graph
    // Each chain stores a set of mentions that link to each other,
    // along with a method for getting the most representative mention
    // Both sentence and token offsets start at 1!
    Map<Integer, edu.stanford.nlp.dcoref.CorefChain> graph = document.get(CorefChainAnnotation.class);

    hello = "terminado";
}

From source file:com.asimihsan.handytrowel.nlp.TextAnalyzer.java

License:Open Source License

public TextAnalyzer analyze() {
    // Stanford CoreNLP, avoid lemmatization as it's very slow to use Porter2 stemming
    // instead. (Porter -> Snowball (Porter2) -> Lancaster is order of stemming
    // aggressiveness.
    ///*from   ww w . ja  va 2 s.  c  o m*/
    // other ideas
    // - remove top 10k most common english words
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, stopword");
    props.setProperty("customAnnotatorClass.stopword", "com.asimihsan.handytrowel.nlp.StopwordAnnotator");
    List<String> stopWords = null;
    try {
        stopWords = WordReader.wordReaderWithResourcePath("/nlp/top1000words.txt").getWords();
    } catch (IOException e) {
        e.printStackTrace();
        return this;
    }
    String customStopWordList = Joiner.on(",").join(stopWords);
    props.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(body);
    pipeline.annotate(document);
    List<CoreLabel> inputTokens = document.get(CoreAnnotations.TokensAnnotation.class);
    SnowballStemmer stemmer = new englishStemmer();
    for (CoreLabel token : inputTokens) {
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);
        if (stopword.first())
            continue;
        String word = token.word().toLowerCase();

        //!!AI TODO this sucks, should make another annotator and make it optional etc.
        //also we're matching full stops! so we lose sentence information.
        if (punctuation.matcher(word).matches())
            continue;

        //!AI TODO again this would be its own annotator and optional
        word = number.matcher(word).replaceAll("NUMBER");

        stemmer.setCurrent(word);
        stemmer.stem();
        word = stemmer.getCurrent();
        tokens.add(word);
    }
    return this;
}

From source file:com.dreamoval.opensource.driller.nlp.DrillerTokenizer.java

public void getwords(String text) {
    this.text = text;
    // Scanner sc = new Scanner(System.in);
    // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // read some text in the text variable
    //text = sc.nextLine();// Add your text here!
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);
    // run all Annotators on this text
    pipeline.annotate(document);
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    for (CoreMap sentence : sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            // this is the text of the token
            String word = token.get(TextAnnotation.class);
            // this is the POS tag of the token
            String pos = token.get(PartOfSpeechAnnotation.class);
            // this is the NER label of the token
            String ne = token.get(NamedEntityTagAnnotation.class);
            // this is the parse tree of the current sentence
            Tree tree = sentence.get(TreeAnnotation.class);

            // this is the Stanford dependency graph of the current sentence
            SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
            System.out.println(word);

        }// w w  w  .  j av  a 2  s .  c o  m
        // This is the coreference link graph
        // Each chain stores a set of mentions that link to each other,
        // along with a method for getting the most representative mention
        // Both sentence and token offsets start at 1!
        Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);

    }

}

From source file:com.github.sharispe.slib.dsm.utils.StanfordLemmatizer.java

License:Open Source License

/**
 * Lemmatize a document and save the result in another file
 * @param inputFile the file to lemmatize
 * @param outputFile the result /*from   w  w  w. j a  v a2s.  com*/
 * @param path_to_pos_model the path to the POS model to consider
 * @throws IOException if an IO error occurs
 */
public static void lemmatize(String inputFile, String outputFile, String path_to_pos_model) throws IOException {

    // https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
    String[] pennTags = { "NN", "NNS", "NNP", "VB" };
    List<String> acceptedPennTag = Arrays.asList(pennTags);
    String textContent = readFile(inputFile, StandardCharsets.UTF_8);
    String textContentProcess = "";

    // To remove the annoying log
    RedwoodConfiguration.empty().capture(System.err).apply();

    Properties props = new Properties();
    props.put("pos.model", path_to_pos_model);
    props.put("annotators", "tokenize, ssplit, pos, lemma");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(textContent);

    // run all Annotators on this text
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

    String sentenceLem;

    for (CoreMap sentence : sentences) {
        sentenceLem = "";

        boolean f = true;
        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {

            String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
            String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);

            if (acceptedPennTag.contains(pos)) {
                if (!f) {
                    sentenceLem += " ";
                }
                sentenceLem += lemma;
                f = false;
            }
        }
        textContentProcess += sentenceLem + "\n";
    }
    // enable log
    RedwoodConfiguration.current().clear().apply();
    FileUtils.writeStringToFile(new File(outputFile), textContentProcess, false);
}