Example usage for edu.stanford.nlp.simple Sentence nerTags

List of usage examples for edu.stanford.nlp.simple Sentence nerTags

Introduction

In this page you can find the example usage for edu.stanford.nlp.simple Sentence nerTags.

Prototype

public List<String> nerTags() 

Source Link

Usage

From source file:algo2.Algo2.java

public static void main(String[] args) {

    try {//from  ww  w  . j  a  va2 s  .c  o  m
        ArrayList<String> strings1 = new ArrayList<>();
        Set<String> locations = new LinkedHashSet<>();
        Set<String> organizations = new LinkedHashSet<>();
        Set<String> dates = new LinkedHashSet<>();
        Set<String> persons = new LinkedHashSet<>();

        File file = new File("input.xml");
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        org.w3c.dom.Document doc = dBuilder.parse(file);

        //get the root element
        doc.getDocumentElement().normalize();
        //System.out.println(doc.getDocumentElement().getNodeName());

        NodeList nList1 = doc.getElementsByTagName("P");

        int length = nList1.getLength();
        for (int j = 0; j < length; j++) {
            Node node = nList1.item(j);
            String str;

            if (node.getNodeType() == Node.ELEMENT_NODE) {
                Element eElement = (Element) node;
                str = eElement.getTextContent();
                strings1.add(str);
            }
        }

        PrintWriter writer = new PrintWriter("outputFile.txt");
        String prevWord = "";

        for (String str : strings1) {
            List<String> nerTags;
            Sentence sentence = new Sentence(str);
            nerTags = sentence.nerTags();

            for (int i = 0; i < nerTags.size(); i++) {
                if (nerTags.get(i).equalsIgnoreCase("location")) {
                    String word = sentence.lemma(i);

                    if (nerTags.get(i + 1).equalsIgnoreCase("location")) {
                        word = word + " " + sentence.lemma(i + 1);

                        if (nerTags.get(i + 2).equalsIgnoreCase("location")) {
                            word = word + " " + sentence.lemma(i + 2);

                            if (nerTags.get(i + 3).equalsIgnoreCase("location")) {
                                word = word + " " + sentence.lemma(i + 3);

                                if (nerTags.get(i + 4).equalsIgnoreCase("location")) {
                                    word = word + " " + sentence.lemma(i + 4);
                                }
                            }
                        }

                    }
                    if (prevWord.contains(word)) {
                        prevWord = word;
                        continue;
                    }

                    locations.add(word);
                    prevWord = word;
                } else if (nerTags.get(i).equalsIgnoreCase("organization")) {
                    String word = sentence.lemma(i);

                    if (nerTags.get(i + 1).equalsIgnoreCase("organization")) {
                        word = word + " " + sentence.lemma(i + 1);

                        if (nerTags.get(i + 2).equalsIgnoreCase("organization")) {
                            word = word + " " + sentence.lemma(i + 2);

                            if (nerTags.get(i + 3).equalsIgnoreCase("organization")) {
                                word = word + " " + sentence.lemma(i + 3);

                                if (nerTags.get(i + 4).equalsIgnoreCase("organization")) {
                                    word = word + " " + sentence.lemma(i + 4);

                                    if (nerTags.get(i + 5).equalsIgnoreCase("organization")) {
                                        word = word + " " + sentence.lemma(i + 5);

                                        if (nerTags.get(i + 6).equalsIgnoreCase("organization")) {
                                            word = word + " " + sentence.lemma(i + 6);

                                            if (nerTags.get(i + 7).equalsIgnoreCase("organization")) {
                                                word = word + " " + sentence.lemma(i + 7);

                                                if (nerTags.get(i + 8).equalsIgnoreCase("organization")) {
                                                    word = word + " " + sentence.lemma(i + 8);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                    if (prevWord.contains(word)) {
                        prevWord = word;
                        continue;
                    }

                    organizations.add(word);
                    prevWord = word;
                } else if (nerTags.get(i).equalsIgnoreCase("date")) {
                    String word = sentence.lemma(i);

                    if (nerTags.get(i + 1).equalsIgnoreCase("date")) {
                        word = word + " " + sentence.lemma(i + 1);

                        if (nerTags.get(i + 2).equalsIgnoreCase("date")) {
                            word = word + " " + sentence.lemma(i + 2);

                            if (nerTags.get(i + 3).equalsIgnoreCase("date")) {
                                word = word + " " + sentence.lemma(i + 3);

                                if (nerTags.get(i + 4).equalsIgnoreCase("date")) {
                                    word = word + " " + sentence.lemma(i + 4);
                                }
                            }
                        }
                    }
                    if (prevWord.contains(word)) {
                        prevWord = word;
                        continue;
                    }

                    dates.add(word);
                    prevWord = word;
                } else if (nerTags.get(i).equalsIgnoreCase("person")) {
                    String word = sentence.lemma(i);

                    if (nerTags.get(i + 1).equalsIgnoreCase("person")) {
                        word = word + " " + sentence.lemma(i + 1);

                        if (nerTags.get(i + 2).equalsIgnoreCase("person")) {
                            word = word + " " + sentence.lemma(i + 2);

                            if (nerTags.get(i + 3).equalsIgnoreCase("person")) {
                                word = word + " " + sentence.lemma(i + 3);

                                if (nerTags.get(i + 4).equalsIgnoreCase("person")) {
                                    word = word + " " + sentence.lemma(i + 4);
                                }
                            }
                        }
                    }
                    if (prevWord.contains(word)) {
                        prevWord = word;
                        continue;
                    }

                    persons.add(word);
                    prevWord = word;
                }
            }
        }

        writer.println("\nPersons:\n---------------");
        for (String str : persons) {
            writer.println(str);

        }
        writer.println();

        writer.println("Locations:\n--------------");
        for (String str : locations) {
            writer.println(str);
        }
        writer.println();

        writer.println("\nOrganizations:\n----------------");
        for (String str : organizations) {
            writer.println(str);
        }
        writer.println();

        writer.println("\nDates:\n---------------");
        for (String str : dates) {
            writer.println(str);
        }

        writer.close();

    } catch (Exception ex) {
        System.out.println("Exception: " + ex);
    }

}