Example usage for edu.stanford.nlp.simple Sentence Sentence

List of usage examples for edu.stanford.nlp.simple Sentence Sentence

Introduction

In this page you can find the example usage for edu.stanford.nlp.simple Sentence Sentence.

Prototype

public Sentence(SentenceFragment sentence) 

Source Link

Document

Convert a sentence fragment (i.e., entailed sentence) into a simple sentence object.

Usage

From source file:algo2.Algo2.java

public static void main(String[] args) {

    try {/*from   w ww.j  a  v  a 2s. co m*/
        ArrayList<String> strings1 = new ArrayList<>();
        Set<String> locations = new LinkedHashSet<>();
        Set<String> organizations = new LinkedHashSet<>();
        Set<String> dates = new LinkedHashSet<>();
        Set<String> persons = new LinkedHashSet<>();

        File file = new File("input.xml");
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        org.w3c.dom.Document doc = dBuilder.parse(file);

        //get the root element
        doc.getDocumentElement().normalize();
        //System.out.println(doc.getDocumentElement().getNodeName());

        NodeList nList1 = doc.getElementsByTagName("P");

        int length = nList1.getLength();
        for (int j = 0; j < length; j++) {
            Node node = nList1.item(j);
            String str;

            if (node.getNodeType() == Node.ELEMENT_NODE) {
                Element eElement = (Element) node;
                str = eElement.getTextContent();
                strings1.add(str);
            }
        }

        PrintWriter writer = new PrintWriter("outputFile.txt");
        String prevWord = "";

        for (String str : strings1) {
            List<String> nerTags;
            Sentence sentence = new Sentence(str);
            nerTags = sentence.nerTags();

            for (int i = 0; i < nerTags.size(); i++) {
                if (nerTags.get(i).equalsIgnoreCase("location")) {
                    String word = sentence.lemma(i);

                    if (nerTags.get(i + 1).equalsIgnoreCase("location")) {
                        word = word + " " + sentence.lemma(i + 1);

                        if (nerTags.get(i + 2).equalsIgnoreCase("location")) {
                            word = word + " " + sentence.lemma(i + 2);

                            if (nerTags.get(i + 3).equalsIgnoreCase("location")) {
                                word = word + " " + sentence.lemma(i + 3);

                                if (nerTags.get(i + 4).equalsIgnoreCase("location")) {
                                    word = word + " " + sentence.lemma(i + 4);
                                }
                            }
                        }

                    }
                    if (prevWord.contains(word)) {
                        prevWord = word;
                        continue;
                    }

                    locations.add(word);
                    prevWord = word;
                } else if (nerTags.get(i).equalsIgnoreCase("organization")) {
                    String word = sentence.lemma(i);

                    if (nerTags.get(i + 1).equalsIgnoreCase("organization")) {
                        word = word + " " + sentence.lemma(i + 1);

                        if (nerTags.get(i + 2).equalsIgnoreCase("organization")) {
                            word = word + " " + sentence.lemma(i + 2);

                            if (nerTags.get(i + 3).equalsIgnoreCase("organization")) {
                                word = word + " " + sentence.lemma(i + 3);

                                if (nerTags.get(i + 4).equalsIgnoreCase("organization")) {
                                    word = word + " " + sentence.lemma(i + 4);

                                    if (nerTags.get(i + 5).equalsIgnoreCase("organization")) {
                                        word = word + " " + sentence.lemma(i + 5);

                                        if (nerTags.get(i + 6).equalsIgnoreCase("organization")) {
                                            word = word + " " + sentence.lemma(i + 6);

                                            if (nerTags.get(i + 7).equalsIgnoreCase("organization")) {
                                                word = word + " " + sentence.lemma(i + 7);

                                                if (nerTags.get(i + 8).equalsIgnoreCase("organization")) {
                                                    word = word + " " + sentence.lemma(i + 8);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                    if (prevWord.contains(word)) {
                        prevWord = word;
                        continue;
                    }

                    organizations.add(word);
                    prevWord = word;
                } else if (nerTags.get(i).equalsIgnoreCase("date")) {
                    String word = sentence.lemma(i);

                    if (nerTags.get(i + 1).equalsIgnoreCase("date")) {
                        word = word + " " + sentence.lemma(i + 1);

                        if (nerTags.get(i + 2).equalsIgnoreCase("date")) {
                            word = word + " " + sentence.lemma(i + 2);

                            if (nerTags.get(i + 3).equalsIgnoreCase("date")) {
                                word = word + " " + sentence.lemma(i + 3);

                                if (nerTags.get(i + 4).equalsIgnoreCase("date")) {
                                    word = word + " " + sentence.lemma(i + 4);
                                }
                            }
                        }
                    }
                    if (prevWord.contains(word)) {
                        prevWord = word;
                        continue;
                    }

                    dates.add(word);
                    prevWord = word;
                } else if (nerTags.get(i).equalsIgnoreCase("person")) {
                    String word = sentence.lemma(i);

                    if (nerTags.get(i + 1).equalsIgnoreCase("person")) {
                        word = word + " " + sentence.lemma(i + 1);

                        if (nerTags.get(i + 2).equalsIgnoreCase("person")) {
                            word = word + " " + sentence.lemma(i + 2);

                            if (nerTags.get(i + 3).equalsIgnoreCase("person")) {
                                word = word + " " + sentence.lemma(i + 3);

                                if (nerTags.get(i + 4).equalsIgnoreCase("person")) {
                                    word = word + " " + sentence.lemma(i + 4);
                                }
                            }
                        }
                    }
                    if (prevWord.contains(word)) {
                        prevWord = word;
                        continue;
                    }

                    persons.add(word);
                    prevWord = word;
                }
            }
        }

        writer.println("\nPersons:\n---------------");
        for (String str : persons) {
            writer.println(str);

        }
        writer.println();

        writer.println("Locations:\n--------------");
        for (String str : locations) {
            writer.println(str);
        }
        writer.println();

        writer.println("\nOrganizations:\n----------------");
        for (String str : organizations) {
            writer.println(str);
        }
        writer.println();

        writer.println("\nDates:\n---------------");
        for (String str : dates) {
            writer.println(str);
        }

        writer.close();

    } catch (Exception ex) {
        System.out.println("Exception: " + ex);
    }

}

From source file:org.lambda3.text.simplification.discourse.utils.words.WordsUtils.java

License:Open Source License

public static Word lemmatize(Word word) {
    Sentence sentence = new Sentence(word.value());
    return new Word(sentence.lemma(0));
}