List of usage examples for edu.stanford.nlp.pipeline StanfordCoreNLP annotate
@Override public void annotate(Annotation annotation)
From source file:Treeparse.java
public static void main(String[] args) { // TODO code application logic here Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma,parse"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); System.out.println("Enter the text:"); Scanner sc = new Scanner(System.in); text = sc.nextLine();/*from ww w .j av a2 s . c om*/ //while(text!="exit") //{ Annotation document = new Annotation(text); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { token_length = sentence.get(TokensAnnotation.class).size(); arr1 = new String[POSTagger.token_length]; arr2 = new String[POSTagger.token_length]; int i = 0, j = 0; // System.out.println("Size"+token_length); for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String word = token.get(TextAnnotation.class); String pos = token.get(PartOfSpeechAnnotation.class); // String ner = token.get(NamedEntityTagAnnotation.class); } Tree tree = sentence.get(TreeAnnotation.class); // System.out.println(tree); List<Tree> x = GetNounPhrases(tree); System.out.println(x); // Print words and Pos Tags /*for (Tree leaf : leaves) { Tree parent = leaf.parent(tree); System.out.print(leaf.label().value() + "-" + parent.label().value() + " "); }*/ } //System.out.println("Enter the text:"); //text=sc.nextLine(); }
From source file:rev.java
/** * Processes requests for both HTTP <code>GET</code> and <code>POST</code> * methods.//from www . ja v a2 s . c o m * * @param request servlet request * @param response servlet response * @throws ServletException if a servlet-specific error occurs * @throws IOException if an I/O error occurs */ protected void processRequest(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException, SQLException, ClassNotFoundException { response.setContentType("text/html;charset=UTF-8"); try (PrintWriter out = response.getWriter()) { String a = request.getParameter("userMsg"); /* TODO output your page here. You may use following sample code. */ out.println("<!DOCTYPE HTML>\n" + "<head>\n" + "<link href=\"css/style.css\" rel=\"stylesheet\" type=\"text/css\" media=\"all\"/>\n" + "<link href=\"css/slider.css\" rel=\"stylesheet\" type=\"text/css\" media=\"all\"/>\n" + "<script type=\"text/javascript\" src=\"js/jquery-1.9.0.min.js\"></script>\n" + "<script type=\"text/javascript\" src=\"js/move-top.js\"></script>\n" + "<script type=\"text/javascript\" src=\"js/easing.js\"></script>\n" + "<script type=\"text/javascript\" src=\"js/jquery.nivo.slider.js\"></script>\n" + "<script type=\"text/javascript\">\n" + " $(window).load(function() {\n" + " $('#slider').nivoSlider();\n" + " });\n" + " <%! String n;\n" + " %>\n" + " <% \n" + " \n" + " n=(String)session.getAttribute(\"uname\"); \n" + " %>\n" + " </script>\n" + "</head>\n" + "<body>\n" + " <div class=\"header\">\n" + " <div class=\"headertop_desc\">\n" + " <div class=\"wrap\">\n" + " <div class=\"nav_list\">\n" + " \n" + " </div>\n" + " <div class=\"account_desc\">\n" + " <ul>\n" + " <li><a href=\"available.jsp\">Available movies</a></li>\n" + " <li><a href=\"takereview.jsp\">Review Movies</a></li>\n" + " <li><a href=\"rated.jsp\">Movies Rated</a></li>\n" + " <li><a href=\"abc.jsp\">Recommend Me</a></li>\n" + " \n" + " <li><a href=\"contact.html\">Contact</a></li>\n" + " <li><a href=\"logout\">Logout</a></li>\n" + " </ul>\n" + " </div>\n" + " <div class=\"clear\"></div>\n" + " </div>\n" + " </div>\n" + " <div class=\"wrap\">\n" + " <div class=\"header_top\">\n" + " <div class=\"logo\">\n" + " <a href=\"index.html\"><img src=\"images/logo1.jpg\" alt=\"\" /></a>\n" + " </div>\n" + " <div class=\"header_top_right\">\n" + " <div class=\"search_box\">\n" + " \n" + " </div>\n" + " <div class=\"clear\"></div>\n" + " </div>\n" + " \n" + " <div class=\"clear\"></div>\n" + " </div>\n" + " \n" + " \n" + "\n" + "\n" + ""); String line = "this book is too good to sleep"; Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, parse, sentiment, lemma"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation annotation = new Annotation(a); pipeline.annotate(annotation); annotation.toShorterString(); List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); if (sentences != null && !sentences.isEmpty()) { for (int i = 0; i < sentences.size(); i++) { CoreMap sentence = sentences.get(i); Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class); int sentiment = RNNCoreAnnotations.getPredictedClass(tree); String sentimentName = sentence.get(SentimentCoreAnnotations.SentimentClass.class); //Class.forName("com.mysql.jdbc.Driver"); /*String connectionURL = "jdbc:mysql://localhost:3306/review"; Connection conn; Statement stmt; ResultSet rs; conn = DriverManager.getConnection (connectionURL,"root",""); stmt = conn.createStatement(); // rs = stmt.executeQuery(""); out.println(); */ out.println("The sentence is:"); sentence.get(CoreAnnotations.TextAnnotation.class); //out.println("Sentiment of \n> \""++"\"\nis: " + sentiment+" (i.e., "+sentimentName+")"); out.println(sentimentName + " " + sentiment); if (sentimentName.equalsIgnoreCase("Negative")) { final String negative = "negative"; final String positive = "positive"; final String nuetral = "nuetral"; final String verypositive = "very positive"; final String verynegative = " very negative"; final DefaultCategoryDataset dataset = new DefaultCategoryDataset(); //out.println("NOT"); dataset.addValue(0, positive, positive); dataset.addValue(sentiment, negative, negative); dataset.addValue(0, nuetral, nuetral); dataset.addValue(0, verynegative, verynegative); dataset.addValue(0, verypositive, verypositive); JFreeChart barChart = ChartFactory.createBarChart("Movie Reviews", "Ratings", "Sentiments", dataset, PlotOrientation.VERTICAL, true, true, false); int width = 640; /* Width of the image */ int height = 480; /* Height of the image */ File BarChart = new File("/home/rishabh/NetBeansProjects/minor/web/images/k.jpeg"); ChartUtilities.saveChartAsJPEG(BarChart, barChart, width, height); out.println("<img src=\"images/BarChart.jpeg\">"); } else if (sentimentName.equalsIgnoreCase("Positive")) { final String negative = "negative"; final String positive = "positive"; final String nuetral = "nuetral"; final String verypositive = "very positive"; final String verynegative = " very negative"; final DefaultCategoryDataset dataset = new DefaultCategoryDataset(); // out.println("Good"); dataset.addValue(sentiment, positive, positive); dataset.addValue(0, negative, negative); dataset.addValue(0, nuetral, nuetral); dataset.addValue(0, verynegative, verynegative); dataset.addValue(0, verypositive, verypositive); JFreeChart barChart = ChartFactory.createBarChart("Movie Reviews", "Ratings", "Sentiments", dataset, PlotOrientation.VERTICAL, true, true, false); int width = 640; /* Width of the image */ int height = 480; /* Height of the image */ File BarChart = new File("/home/rishabh/NetBeansProjects/minor/web/images/k.jpeg"); ChartUtilities.saveChartAsJPEG(BarChart, barChart, width, height); out.println("<img src=\"images/BarChart1.jpeg\">"); } else if (sentimentName.equalsIgnoreCase("Neutral")) { final String negative = "negative"; final String positive = "positive"; final String nuetral = "nuetral"; final String verypositive = "very positive"; final String verynegative = " very negative"; final DefaultCategoryDataset dataset = new DefaultCategoryDataset(); //out.println("Good"); dataset.addValue(0, positive, positive); dataset.addValue(0, negative, negative); dataset.addValue(sentiment, nuetral, nuetral); dataset.addValue(0, verynegative, verynegative); dataset.addValue(0, verypositive, verypositive); JFreeChart barChart = ChartFactory.createBarChart("Movie Reviews", "Ratings", "Sentiments", dataset, PlotOrientation.VERTICAL, true, true, false); int width = 640; /* Width of the image */ int height = 480; /* Height of the image */ File BarChart = new File("/home/rishabh/NetBeansProjects/minor/web/images/k.jpeg"); ChartUtilities.saveChartAsJPEG(BarChart, barChart, width, height); out.println("<img src=\"images/BarChart2.jpeg\">"); } else if (sentimentName.equalsIgnoreCase("Very Positive")) { final String negative = "negative"; final String positive = "positive"; final String nuetral = "nuetral"; final String verypositive = "very positive"; final String verynegative = " very negative"; final DefaultCategoryDataset dataset = new DefaultCategoryDataset(); //out.println("Good"); dataset.addValue(0, positive, positive); dataset.addValue(0, negative, negative); dataset.addValue(0, nuetral, nuetral); dataset.addValue(0, verynegative, verynegative); dataset.addValue(sentiment, verypositive, verypositive); JFreeChart barChart = ChartFactory.createBarChart("Movie Reviews", "Ratings", "Sentiments", dataset, PlotOrientation.VERTICAL, true, true, false); int width = 640; /* Width of the image */ int height = 480; /* Height of the image */ File BarChart = new File("/home/rishabh/NetBeansProjects/minor/web/images/k.jpeg"); ChartUtilities.saveChartAsJPEG(BarChart, barChart, width, height); out.println("<img src=\"images/BarChart4.jpeg\">"); } else if (sentimentName.equalsIgnoreCase("Very Negative")) { final String negative = "negative"; final String positive = "positive"; final String nuetral = "nuetral"; final String verypositive = "very positive"; final String verynegative = " very negative"; final DefaultCategoryDataset dataset = new DefaultCategoryDataset(); //out.println("Good"); dataset.addValue(0, positive, positive); dataset.addValue(0, negative, negative); dataset.addValue(0, nuetral, nuetral); dataset.addValue(sentiment, verynegative, verynegative); dataset.addValue(0, verypositive, verypositive); JFreeChart barChart = ChartFactory.createBarChart("Movie Reviews", "Ratings", "Sentiments", dataset, PlotOrientation.VERTICAL, true, true, false); int width = 640; /* Width of the image */ int height = 480; /* Height of the image */ File BarChart = new File("/home/rishabh/NetBeansProjects/minor/web/images/k.jpeg"); ChartUtilities.saveChartAsJPEG(BarChart, barChart, width, height); out.println("<img src=\"images/BarChart3.jpeg\">"); } } } out.println("<div class=\"footer\">\n" + " <div class=\"wrap\">\n" + " <div class=\"section group\">\n" + " <div class=\"col span\">\n" + " <h4>Information</h4>\n" + " <ul>\n" + " <li><a href=\"#\">About Us</a></li>\n" + " \n" + " <li><a href=\"contact.html\">Contact Us</a></li>\n" + " </ul>\n" + " </div>\n" + " <div class=\"col span\">\n" + " <h4>Know us better</h4>\n" + " <ul>\n" + " <li><a href=\"#\">About Us</a></li>\n" + " \n" + " <li><a href=\"contact.html\">Site Map</a></li>\n" + " <li><a href=\"#\">Search Terms</a></li>\n" + " </ul>\n" + " </div>\n" + " \n" + " <div class=\"col span\">\n" + " <h4>Contact</h4>\n" + " <ul>\n" + " <li><span>9971825755</span></li>\n" + " <li><span>8130527232</span></li>\n" + " </ul>\n" + " <div class=\"social-icons\">\n" + " <h4>Follow Us</h4>\n" + " <ul>\n" + " <li><a href=\"#\" target=\"_blank\"><img src=\"images/facebook.png\" alt=\"\" /></a></li>\n" + " <li><a href=\"#\" target=\"_blank\"><img src=\"images/twitter.png\" alt=\"\" /></a></li>\n" + " <li><a href=\"#\" target=\"_blank\"><img src=\"images/skype.png\" alt=\"\" /> </a></li>\n" + " <li><a href=\"#\" target=\"_blank\"> <img src=\"images/linkedin.png\" alt=\"\" /></a></li>\n" + " <div class=\"clear\"></div>\n" + " </ul>\n" + " </div>\n" + " </div>\n" + " </div>\n" + " <div class=\"copy_right\">\n" + " <p>Company Name All rights Reseverd </p>\n" + " </div>\n" + " </div>\n" + " </div>\n" + " <script type=\"text/javascript\">\n" + " $(document).ready(function() {\n" + " $().UItoTop({ easingType: 'easeOutQuart' });\n" + "\n" + " });\n" + " </script>\n" + " <a href=\"#\" id=\"toTop\"><span id=\"toTopHover\"> </span></a>\n" + "</body>\n" + "</html>\n" + "\n" + ""); } }
From source file:analytics.weka.EnglishTextAnnotations.java
License:Apache License
public static void main(String[] args) { // creates a StanfordCoreNLP object, with POS tagging, lemmatization, // NER, parsing, and coreference resolution Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner");//, parse, sentiment StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // read some text in the text variable String text = "jackie brown ( miramax - 1997 ) starring pam grier , samuel l . jackson , robert forster , bridget fonda , michael keaton , robert de niro , michael bowen , chris tucker screenplay by quentin tarantino , based on the novel rum punch by elmore leonard produced by lawrence bender directed by quentin tarantino running time : 155 minutes note : some may consider portions of the following text to be spoilers . be forewarned . ------------------------------------------------------------- during the three years since the release of the groundbreaking success pulp fiction , the cinematic output from its creator , quentin tarantino , has been surprisingly low . oh , he\'s been busy -- doing the talk show circuit , taking small roles in various films , overseeing the production of his screenplay from dusk till dawn , making cameo appearances on television shows , providing a vignette for the ill-fated anthology four rooms -- everything , it seems , except direct another feature-length film . it\'s been the long intermission between projects as well as the dizzying peak which pulp fiction reached which has made mr . tarantino\'s new feature film , jackie brown , one of the most anticipated films of the year , and his third feature film cements his reputation as the single most important new american filmmaker to emerge from the 1990s . things aren\'t going well for jackie brown ( pam grier ) . she\'s 44 years old , stuck at a dead-end job ( \" $16 , 000 a year , plus retirement benefits that aren\'t worth a damn \" ) as a flight attendant for the worst airline in north america -- and she\'s just been caught at the airport by atf agent ray nicolette ( portrayed with terrific childlike enthusiasm by michael keaton ) and police officer mark dargus ( michael bowen ) smuggling $50 000 from mexico for gun-runner ordell robbie ( samuel l . jackson ) , who has her bailed out by unassuming bail bondsman max cherry ( robert forster ) . the loquacious ordell , based out of a hermosa beach house where his horny , bong-hitting surfer girl melanie ( bridget fonda ) and agreeable crony louis gara ( robert de niro ) hang out , operates under the policy that the best rat is a dead rat , and he\'s soon out to silence jackie brown . meanwhile , the authorities\' target is ordell , and they want jackie to help them by arranging a sting to the tune of a half-million dollars . only through a series of clever twists , turns , and double-crosses will jackie be able to gain the upper hand on both of her nemeses . although jackie brown marks mr . tarantino\'s first produced screenplay adaptation ( based on the elmore leonard novel \" rum punch \" ) , there\'s no mistaking his distinctive fingerprints all over this film . while he\'s adhered closely to the source material in a narrative sense , the setting has been relocated to los angeles and the lead character\'s now black . in terms of ambiance , the film harkens back to the 1970s , from the wall-to-wall funk and soul music drowning the soundtrack to the nondescript look of the sets -- even the opening title credit sequence has the echo of vintage 1970s productions . the opening sequence featuring ms . grier wordlessly striding through the lax , funky music blaring away on the speakers , is emblematic of films of that era . the timeframe for the film is in fact 1995 , but the atmosphere of jackie brown is decidedly retro . of course , nothing in the film screams 1970s more than the casting of pam grier and robert forster as the two leads , and although the caper intrigue is fun to watch as the plot twists , backstabbing , and deceptions deliciously unfold , the strength of jackie brown is the quiet , understated relationship developed between jackie and max ; when they kiss , it\'s perhaps the most tender scene of the year . tenderness ? in a quentin tarantino film ? sure , there\'ve been moments of sweetness in his prior films -- the affectionate exchanges between the bruce willis and maria de madeiros characters in pulp fiction and the unflagging dedication shared by the characters of tim roth and amanda plummer , or even in reservoir dogs , where a deep , unspoken bond develops between the harvey keitel and tim roth characters -- but for the most part , mr . tarantino\'s films are typified by manic energy , unexpected outbursts of violence , and clever , often wordy , banter . these staples of his work are all present in jackie brown , but what\'s new here is a different facet of his storytelling -- a willingness to imbue the film with a poignant emotional undercurrent , and a patience to draw out several scenes with great deliberation . this effective demonstration of range prohibits the pigeonholing of mr . tarantino as simply a helmer of slick , hip crime dramas with fast-talking lowlifes , and heralds him as a bonafide multifaceted talent ; he\'s the real deal . this new aspect of mr . tarantino\'s storytelling is probably best embodied in a single character -- that of the world-weary , sensitive , and exceedingly-professional max cherry , whose unspoken attraction to jackie is touching . mr . forster\'s nuanced , understated performance is the best in the film ; he creates an amiable character of such poignancy that when he gazes at jackie , we smile along with him . much press has been given about the casting of blaxploitation-era icon pam grier in the lead , with the wags buzzing that mr . tarantino may do for her what his pulp fiction did to bolster john travolta\'s then-sagging career . as it turns out , ms . grier is solid in the film\'s title role , although nothing here forces her to test her range . i do have to take exception to the claim that this film marks her career resurrection , though -- she\'s been working steadily over the years , often in direct-to-video action flicks , but also in such recent theatrical releases as tim burton\'s mars attacks ! and larry cohen\'s original gangstas ( where she first teamed up with mr . forster . ) of course , it\'s true that her role here was a godsend -- a meaty a part as this is rarity for * any * actress , let alone one of her age and current status in the industry . while jackie brown may disappoint those looking for another pulp fiction clone , it marks tremendous growth of mr . tarantino as a director whose horizons are rapidly expanding , and whose characterizations have never been better . and while the film\'s narrative doesn\'t really warrant a running time of 155 minutes , it\'s filled with such sumptuous riches , ranging from the brashness of the vivid soundtrack to entertaining , inconsequential conversations between the characters , that there wasn\'t an unengaging moment . with an impressive trio of feature films under his belt , it\'ll be interesting to see what he tries next . \r\n"; // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); System.out.println(text);/*from w ww.j a v a 2 s. c om*/ System.out.println(document.get(TextAnnotation.class)); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and // has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); StringBuilder lemmas = new StringBuilder(); for (CoreMap sentence : sentences) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods //System.out.println("sentence: "+sentence.get(SentenceBeginAnnotation.class)); for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String word = token.get(TextAnnotation.class); //System.out.println("this is the text of the token: "+word); // String pos = token.get(LemmaAnnotation.class); // String ne = token.get(NamedEntityTagAnnotation.class); //System.out.println("this is the NER label of the token: "+ne); if ("O".equals(ne)) lemmas.append(pos).append(" "); } //System.out.println("sentence: "+sentence.get(SentenceEndAnnotation.class)); } System.out.println("this is the lemma tag of the token: " + lemmas); }
From source file:Anaphora_Resolution.AnaphoraDetection.java
public void anophora() { String text = "Tom is a smart boy. He know a lot of thing."; Annotation document = new Annotation(text); Properties props = new Properties(); props.put("annotators", "tokenize, ssplit,parse, lemma, ner, dcoref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); pipeline.annotate(document); Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class); for (Integer i : graph.keySet()) { System.out.println("GROUP " + i); CorefChain x = graph.get(i);/*from w w w . j a v a 2s . co m*/ for (CorefMention m : x.getMentionsInTextualOrder()) { System.out.println(m.mentionSpan); } } }
From source file:Anaphora_Resolution.Coref.java
/** * @param args the command line arguments *//* w w w .jav a 2 s .c o m*/ public static void main(String[] args) throws IOException, ClassNotFoundException { // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); props.put("pos.model", "H:\\nlp jar files\\stanford-postagger-2014-08-27\\stanford-postagger-2014-08-27\\models\\english-left3words-distsim.tagger"); //props.put("dcoref.big.gender.number", "edu/stanford/nlp/models/dcoref/gender.data.gz"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // read some text in the text variable String text = "Mary has a little lamb. She is very cute."; // Add your text here! // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // this is the text of the token String word = token.get(TextAnnotation.class); // this is the POS tag of the token String pos = token.get(PartOfSpeechAnnotation.class); // this is the NER label of the token String ne = token.get(NamedEntityTagAnnotation.class); } // this is the parse tree of the current sentence Tree tree = sentence.get(TreeAnnotation.class); System.out.println(tree); // this is the Stanford dependency graph of the current sentence SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); } // This is the coreference link graph // Each chain stores a set of mentions that link to each other, // along with a method for getting the most representative mention // Both sentence and token offsets start at 1! Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class); System.out.println(graph); }
From source file:candidateGeneration.remove_missingContext.java
public static void main(String[] args) throws FileNotFoundException, IOException { InputStream is = new FileInputStream(sentence_detect_model); SentenceModel model = new SentenceModel(is); SentenceDetectorME sdetector = new SentenceDetectorME(model); Properties props = new Properties(); props.put("annotators", "tokenize,ssplit,pos,lemma,ner,parse,dcoref"); StanfordCoreNLP pi = new StanfordCoreNLP(props); File writeFile = new File( "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\Candidate_Generation\\good_sentences_new.txt"); writeFile.createNewFile();//from w w w.j av a 2 s . com FileWriter writer = new FileWriter(writeFile); File writeFile2 = new File( "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\Candidate_Generation\\bad_sentences_new.txt"); writeFile2.createNewFile(); FileWriter writer2 = new FileWriter(writeFile2); String folderPath = "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\movieTest\\indivFiles\\"; File[] files = new File(folderPath).listFiles(); for (File file : files) { if (file.isFile()) { String name = file.getName(); name = name.replace("_", " "); name = name.replace("%28", "("); name = name.replace("%29", ")"); name = name.replace(".txt", ""); System.out.println("File: " + name); FileReader inputFile = new FileReader(folderPath + file.getName()); BufferedReader bufferReader = new BufferedReader(inputFile); String input; while ((input = bufferReader.readLine()) != null) { //System.out.println("Line: " + input); String sentences[] = sdetector.sentDetect(input); HashMap<Integer, Integer> toRemove = new HashMap<>(); Annotation doc = new Annotation(input); pi.annotate(doc); Map<Integer, CorefChain> graph = doc.get(CorefCoreAnnotations.CorefChainAnnotation.class); for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()) { CorefChain c = entry.getValue(); if (c.getMentionsInTextualOrder().size() <= 1) { continue; } //System.out.println("Mentions: " + c.toString()); String[] sentenceOccurence = c.toString().split(" "); int firstOccurence = -1; for (int i = 0; i < sentenceOccurence.length; i++) { if (firstOccurence == -1 && sentenceOccurence[i].equals("sentence")) { //System.out.println("first occurence : " + sentenceOccurence[i+1]); firstOccurence = Integer .parseInt(sentenceOccurence[i + 1].replace(",", "").replace("]", "")); continue; } if (sentenceOccurence[i].equals("sentence")) { //System.out.println("further occurence : "+sentenceOccurence[i+1]); if (Integer.parseInt(sentenceOccurence[i + 1].replace(",", "").replace("]", "")) != firstOccurence) { //System.out.println("Added " + sentenceOccurence[i+1].replace(",", "").replace("]", "") + " for removal"); toRemove.put(Integer.parseInt( sentenceOccurence[i + 1].replace(",", "").replace("]", "")), 1); } } } //System.out.println(c.toString()); } int cand_i = 1; for (String candidate_sentence : sentences) { if (toRemove.containsKey(cand_i)) { //System.out.println("REMOVING: " + candidate_sentence + "\n"); writer2.write(name + "\t" + candidate_sentence + "\n"); continue; } //System.out.println("TAKING: " + candidate_sentence + "\n"); writer.write(name + "\t" + candidate_sentence + "\n"); cand_i++; } //System.in.read(); } //System.out.println("Line done"); bufferReader.close(); //System.in.read(); } writer.flush(); writer2.flush(); } writer.close(); writer2.close(); }
From source file:cl.usach.ner.Ner.java
public void do_ner() { nueva = new ArrayList<>(); StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,ner,parse,dcoref", "tokenize.language", "es")); // read some text in the text variable String text = tweet;// www. ja v a 2 s . com text = text.replace("#", ""); // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); oracion = text; for (CoreMap sentence : sentences) { System.out.println(sentence); // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // this is the text of the token System.out.println("Palabra:"); String word = token.get(TextAnnotation.class); System.out.println(word); ArrayList<String> temp = new ArrayList<>(); temp.add(word); // this is the POS tag of the token String pos = token.get(PartOfSpeechAnnotation.class); // this is the NER label of the token String ne = token.get(NamedEntityTagAnnotation.class); System.out.println(ne); if (ne != null) { temp.add(ne); } nueva.add(temp); } // this is the parse tree of the current sentence Tree tree = sentence.get(TreeAnnotation.class); // this is the Stanford dependency graph of the current sentence SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); } // This is the coreference link graph // Each chain stores a set of mentions that link to each other, // along with a method for getting the most representative mention // Both sentence and token offsets start at 1! Map<Integer, edu.stanford.nlp.dcoref.CorefChain> graph = document.get(CorefChainAnnotation.class); hello = "terminado"; }
From source file:com.asimihsan.handytrowel.nlp.TextAnalyzer.java
License:Open Source License
public TextAnalyzer analyze() { // Stanford CoreNLP, avoid lemmatization as it's very slow to use Porter2 stemming // instead. (Porter -> Snowball (Porter2) -> Lancaster is order of stemming // aggressiveness. ///*from ww w . ja va 2 s. c o m*/ // other ideas // - remove top 10k most common english words Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, stopword"); props.setProperty("customAnnotatorClass.stopword", "com.asimihsan.handytrowel.nlp.StopwordAnnotator"); List<String> stopWords = null; try { stopWords = WordReader.wordReaderWithResourcePath("/nlp/top1000words.txt").getWords(); } catch (IOException e) { e.printStackTrace(); return this; } String customStopWordList = Joiner.on(",").join(stopWords); props.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation document = new Annotation(body); pipeline.annotate(document); List<CoreLabel> inputTokens = document.get(CoreAnnotations.TokensAnnotation.class); SnowballStemmer stemmer = new englishStemmer(); for (CoreLabel token : inputTokens) { Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class); if (stopword.first()) continue; String word = token.word().toLowerCase(); //!!AI TODO this sucks, should make another annotator and make it optional etc. //also we're matching full stops! so we lose sentence information. if (punctuation.matcher(word).matches()) continue; //!AI TODO again this would be its own annotator and optional word = number.matcher(word).replaceAll("NUMBER"); stemmer.setCurrent(word); stemmer.stem(); word = stemmer.getCurrent(); tokens.add(word); } return this; }
From source file:com.dreamoval.opensource.driller.nlp.DrillerTokenizer.java
public void getwords(String text) { this.text = text; // Scanner sc = new Scanner(System.in); // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // read some text in the text variable //text = sc.nextLine();// Add your text here! // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // this is the text of the token String word = token.get(TextAnnotation.class); // this is the POS tag of the token String pos = token.get(PartOfSpeechAnnotation.class); // this is the NER label of the token String ne = token.get(NamedEntityTagAnnotation.class); // this is the parse tree of the current sentence Tree tree = sentence.get(TreeAnnotation.class); // this is the Stanford dependency graph of the current sentence SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); System.out.println(word); }// w w w . j av a 2 s . c o m // This is the coreference link graph // Each chain stores a set of mentions that link to each other, // along with a method for getting the most representative mention // Both sentence and token offsets start at 1! Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class); } }
From source file:com.github.sharispe.slib.dsm.utils.StanfordLemmatizer.java
License:Open Source License
/** * Lemmatize a document and save the result in another file * @param inputFile the file to lemmatize * @param outputFile the result /*from w w w. j a v a2s. com*/ * @param path_to_pos_model the path to the POS model to consider * @throws IOException if an IO error occurs */ public static void lemmatize(String inputFile, String outputFile, String path_to_pos_model) throws IOException { // https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html String[] pennTags = { "NN", "NNS", "NNP", "VB" }; List<String> acceptedPennTag = Arrays.asList(pennTags); String textContent = readFile(inputFile, StandardCharsets.UTF_8); String textContentProcess = ""; // To remove the annoying log RedwoodConfiguration.empty().capture(System.err).apply(); Properties props = new Properties(); props.put("pos.model", path_to_pos_model); props.put("annotators", "tokenize, ssplit, pos, lemma"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // create an empty Annotation just with the given text Annotation document = new Annotation(textContent); // run all Annotators on this text pipeline.annotate(document); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); String sentenceLem; for (CoreMap sentence : sentences) { sentenceLem = ""; boolean f = true; for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { String lemma = token.get(CoreAnnotations.LemmaAnnotation.class); String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class); if (acceptedPennTag.contains(pos)) { if (!f) { sentenceLem += " "; } sentenceLem += lemma; f = false; } } textContentProcess += sentenceLem + "\n"; } // enable log RedwoodConfiguration.current().clear().apply(); FileUtils.writeStringToFile(new File(outputFile), textContentProcess, false); }