Example usage for edu.stanford.nlp.simple Sentence lemma

List of usage examples for edu.stanford.nlp.simple Sentence lemma

Introduction

In this page you can find the example usage for edu.stanford.nlp.simple Sentence lemma.

Prototype

public String lemma(int index) 

Source Link

Usage

From source file:algo2.Algo2.java

public static void main(String[] args) {

    try {/*from   w w  w.ja  v a 2  s.  c  o  m*/
        ArrayList<String> strings1 = new ArrayList<>();
        Set<String> locations = new LinkedHashSet<>();
        Set<String> organizations = new LinkedHashSet<>();
        Set<String> dates = new LinkedHashSet<>();
        Set<String> persons = new LinkedHashSet<>();

        File file = new File("input.xml");
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        org.w3c.dom.Document doc = dBuilder.parse(file);

        //get the root element
        doc.getDocumentElement().normalize();
        //System.out.println(doc.getDocumentElement().getNodeName());

        NodeList nList1 = doc.getElementsByTagName("P");

        int length = nList1.getLength();
        for (int j = 0; j < length; j++) {
            Node node = nList1.item(j);
            String str;

            if (node.getNodeType() == Node.ELEMENT_NODE) {
                Element eElement = (Element) node;
                str = eElement.getTextContent();
                strings1.add(str);
            }
        }

        PrintWriter writer = new PrintWriter("outputFile.txt");
        String prevWord = "";

        for (String str : strings1) {
            List<String> nerTags;
            Sentence sentence = new Sentence(str);
            nerTags = sentence.nerTags();

            for (int i = 0; i < nerTags.size(); i++) {
                if (nerTags.get(i).equalsIgnoreCase("location")) {
                    String word = sentence.lemma(i);

                    if (nerTags.get(i + 1).equalsIgnoreCase("location")) {
                        word = word + " " + sentence.lemma(i + 1);

                        if (nerTags.get(i + 2).equalsIgnoreCase("location")) {
                            word = word + " " + sentence.lemma(i + 2);

                            if (nerTags.get(i + 3).equalsIgnoreCase("location")) {
                                word = word + " " + sentence.lemma(i + 3);

                                if (nerTags.get(i + 4).equalsIgnoreCase("location")) {
                                    word = word + " " + sentence.lemma(i + 4);
                                }
                            }
                        }

                    }
                    if (prevWord.contains(word)) {
                        prevWord = word;
                        continue;
                    }

                    locations.add(word);
                    prevWord = word;
                } else if (nerTags.get(i).equalsIgnoreCase("organization")) {
                    String word = sentence.lemma(i);

                    if (nerTags.get(i + 1).equalsIgnoreCase("organization")) {
                        word = word + " " + sentence.lemma(i + 1);

                        if (nerTags.get(i + 2).equalsIgnoreCase("organization")) {
                            word = word + " " + sentence.lemma(i + 2);

                            if (nerTags.get(i + 3).equalsIgnoreCase("organization")) {
                                word = word + " " + sentence.lemma(i + 3);

                                if (nerTags.get(i + 4).equalsIgnoreCase("organization")) {
                                    word = word + " " + sentence.lemma(i + 4);

                                    if (nerTags.get(i + 5).equalsIgnoreCase("organization")) {
                                        word = word + " " + sentence.lemma(i + 5);

                                        if (nerTags.get(i + 6).equalsIgnoreCase("organization")) {
                                            word = word + " " + sentence.lemma(i + 6);

                                            if (nerTags.get(i + 7).equalsIgnoreCase("organization")) {
                                                word = word + " " + sentence.lemma(i + 7);

                                                if (nerTags.get(i + 8).equalsIgnoreCase("organization")) {
                                                    word = word + " " + sentence.lemma(i + 8);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                    if (prevWord.contains(word)) {
                        prevWord = word;
                        continue;
                    }

                    organizations.add(word);
                    prevWord = word;
                } else if (nerTags.get(i).equalsIgnoreCase("date")) {
                    String word = sentence.lemma(i);

                    if (nerTags.get(i + 1).equalsIgnoreCase("date")) {
                        word = word + " " + sentence.lemma(i + 1);

                        if (nerTags.get(i + 2).equalsIgnoreCase("date")) {
                            word = word + " " + sentence.lemma(i + 2);

                            if (nerTags.get(i + 3).equalsIgnoreCase("date")) {
                                word = word + " " + sentence.lemma(i + 3);

                                if (nerTags.get(i + 4).equalsIgnoreCase("date")) {
                                    word = word + " " + sentence.lemma(i + 4);
                                }
                            }
                        }
                    }
                    if (prevWord.contains(word)) {
                        prevWord = word;
                        continue;
                    }

                    dates.add(word);
                    prevWord = word;
                } else if (nerTags.get(i).equalsIgnoreCase("person")) {
                    String word = sentence.lemma(i);

                    if (nerTags.get(i + 1).equalsIgnoreCase("person")) {
                        word = word + " " + sentence.lemma(i + 1);

                        if (nerTags.get(i + 2).equalsIgnoreCase("person")) {
                            word = word + " " + sentence.lemma(i + 2);

                            if (nerTags.get(i + 3).equalsIgnoreCase("person")) {
                                word = word + " " + sentence.lemma(i + 3);

                                if (nerTags.get(i + 4).equalsIgnoreCase("person")) {
                                    word = word + " " + sentence.lemma(i + 4);
                                }
                            }
                        }
                    }
                    if (prevWord.contains(word)) {
                        prevWord = word;
                        continue;
                    }

                    persons.add(word);
                    prevWord = word;
                }
            }
        }

        writer.println("\nPersons:\n---------------");
        for (String str : persons) {
            writer.println(str);

        }
        writer.println();

        writer.println("Locations:\n--------------");
        for (String str : locations) {
            writer.println(str);
        }
        writer.println();

        writer.println("\nOrganizations:\n----------------");
        for (String str : organizations) {
            writer.println(str);
        }
        writer.println();

        writer.println("\nDates:\n---------------");
        for (String str : dates) {
            writer.println(str);
        }

        writer.close();

    } catch (Exception ex) {
        System.out.println("Exception: " + ex);
    }

}

From source file:org.lambda3.text.simplification.discourse.utils.words.WordsUtils.java

License:Open Source License

public static Word lemmatize(Word word) {
    Sentence sentence = new Sentence(word.value());
    return new Word(sentence.lemma(0));
}