List of usage examples for edu.stanford.nlp.simple Sentence Sentence
public Sentence(SentenceFragment sentence)
From source file:algo2.Algo2.java
public static void main(String[] args) { try {/*from w ww.j a v a 2s. co m*/ ArrayList<String> strings1 = new ArrayList<>(); Set<String> locations = new LinkedHashSet<>(); Set<String> organizations = new LinkedHashSet<>(); Set<String> dates = new LinkedHashSet<>(); Set<String> persons = new LinkedHashSet<>(); File file = new File("input.xml"); DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); org.w3c.dom.Document doc = dBuilder.parse(file); //get the root element doc.getDocumentElement().normalize(); //System.out.println(doc.getDocumentElement().getNodeName()); NodeList nList1 = doc.getElementsByTagName("P"); int length = nList1.getLength(); for (int j = 0; j < length; j++) { Node node = nList1.item(j); String str; if (node.getNodeType() == Node.ELEMENT_NODE) { Element eElement = (Element) node; str = eElement.getTextContent(); strings1.add(str); } } PrintWriter writer = new PrintWriter("outputFile.txt"); String prevWord = ""; for (String str : strings1) { List<String> nerTags; Sentence sentence = new Sentence(str); nerTags = sentence.nerTags(); for (int i = 0; i < nerTags.size(); i++) { if (nerTags.get(i).equalsIgnoreCase("location")) { String word = sentence.lemma(i); if (nerTags.get(i + 1).equalsIgnoreCase("location")) { word = word + " " + sentence.lemma(i + 1); if (nerTags.get(i + 2).equalsIgnoreCase("location")) { word = word + " " + sentence.lemma(i + 2); if (nerTags.get(i + 3).equalsIgnoreCase("location")) { word = word + " " + sentence.lemma(i + 3); if (nerTags.get(i + 4).equalsIgnoreCase("location")) { word = word + " " + sentence.lemma(i + 4); } } } } if (prevWord.contains(word)) { prevWord = word; continue; } locations.add(word); prevWord = word; } else if (nerTags.get(i).equalsIgnoreCase("organization")) { String word = sentence.lemma(i); if (nerTags.get(i + 1).equalsIgnoreCase("organization")) { word = word + " " + sentence.lemma(i + 1); if (nerTags.get(i + 2).equalsIgnoreCase("organization")) { word = word + " " + sentence.lemma(i + 2); if (nerTags.get(i + 3).equalsIgnoreCase("organization")) { word = word + " " + sentence.lemma(i + 3); if (nerTags.get(i + 4).equalsIgnoreCase("organization")) { word = word + " " + sentence.lemma(i + 4); if (nerTags.get(i + 5).equalsIgnoreCase("organization")) { word = word + " " + sentence.lemma(i + 5); if (nerTags.get(i + 6).equalsIgnoreCase("organization")) { word = word + " " + sentence.lemma(i + 6); if (nerTags.get(i + 7).equalsIgnoreCase("organization")) { word = word + " " + sentence.lemma(i + 7); if (nerTags.get(i + 8).equalsIgnoreCase("organization")) { word = word + " " + sentence.lemma(i + 8); } } } } } } } } if (prevWord.contains(word)) { prevWord = word; continue; } organizations.add(word); prevWord = word; } else if (nerTags.get(i).equalsIgnoreCase("date")) { String word = sentence.lemma(i); if (nerTags.get(i + 1).equalsIgnoreCase("date")) { word = word + " " + sentence.lemma(i + 1); if (nerTags.get(i + 2).equalsIgnoreCase("date")) { word = word + " " + sentence.lemma(i + 2); if (nerTags.get(i + 3).equalsIgnoreCase("date")) { word = word + " " + sentence.lemma(i + 3); if (nerTags.get(i + 4).equalsIgnoreCase("date")) { word = word + " " + sentence.lemma(i + 4); } } } } if (prevWord.contains(word)) { prevWord = word; continue; } dates.add(word); prevWord = word; } else if (nerTags.get(i).equalsIgnoreCase("person")) { String word = sentence.lemma(i); if (nerTags.get(i + 1).equalsIgnoreCase("person")) { word = word + " " + sentence.lemma(i + 1); if (nerTags.get(i + 2).equalsIgnoreCase("person")) { word = word + " " + sentence.lemma(i + 2); if (nerTags.get(i + 3).equalsIgnoreCase("person")) { word = word + " " + sentence.lemma(i + 3); if (nerTags.get(i + 4).equalsIgnoreCase("person")) { word = word + " " + sentence.lemma(i + 4); } } } } if (prevWord.contains(word)) { prevWord = word; continue; } persons.add(word); prevWord = word; } } } writer.println("\nPersons:\n---------------"); for (String str : persons) { writer.println(str); } writer.println(); writer.println("Locations:\n--------------"); for (String str : locations) { writer.println(str); } writer.println(); writer.println("\nOrganizations:\n----------------"); for (String str : organizations) { writer.println(str); } writer.println(); writer.println("\nDates:\n---------------"); for (String str : dates) { writer.println(str); } writer.close(); } catch (Exception ex) { System.out.println("Exception: " + ex); } }
From source file:org.lambda3.text.simplification.discourse.utils.words.WordsUtils.java
License:Open Source License
public static Word lemmatize(Word word) { Sentence sentence = new Sentence(word.value()); return new Word(sentence.lemma(0)); }