Example usage for opennlp.tools.postag POSSample getSentence

List of usage examples for opennlp.tools.postag POSSample getSentence

Introduction

In this page you can find the example usage for opennlp.tools.postag POSSample getSentence.

Prototype

public String[] getSentence() 

Source Link

Usage

From source file:com.geocode.service.impl.AddressServiceImpl.java

@Override
public List<String> extractAddress(String input) throws IOException {
    List<String> locations = new ArrayList<String>();
    PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");

    ObjectStream<String> lineStream = new PlainTextByLineStream(new StringReader(input));

    perfMon.start();//from   ww  w.ja v a2 s  .  co m

    input = cleanInputString(input, locations);

    String line;
    String whitespaceTokenizerLine[] = null;

    String[] tags = null;
    POSSample sample = null;
    while ((line = lineStream.read()) != null) {
        whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);
        tags = tagger.tag(whitespaceTokenizerLine);

        sample = new POSSample(whitespaceTokenizerLine, tags);
        //System.out.println(sample.toString());
        perfMon.incrementCounter();
    }

    if (sample != null && sample.getSentence() != null) {
        String[] sent = sample.getSentence();
        String nnp = null;
        boolean itemProcessed = false;
        for (int i = 0; i < tags.length; i++) {
            itemProcessed = false;
            if (!itemProcessed && tags[i].equals("CD") && nnp == null) {
                if (i + 1 < tags.length && baseList.contains(tags[i + 1])) {
                    nnp = addWordToLocation(nnp, sent[i]);
                }
                itemProcessed = true;

            }
            // Denotes completetion of one address
            if (!itemProcessed && tags[i].equals("NN")) {
                if (i - 1 >= 0 && tags[i - 1].equals("NNP")) {
                    itemProcessed = true;
                    locations.add(nnp);
                    nnp = null;
                }
            }
            if (!itemProcessed && list.contains(tags[i]) && !checkExcludedWords(sent[i])) {
                itemProcessed = true;
                nnp = addWordToLocation(nnp, sent[i]);
            }
            if (!itemProcessed) {
                itemProcessed = true;
                if (nnp != null) {
                    locations.add(nnp);

                    nnp = null;
                }
            }
        }
        perfMon.stopAndPrintFinalResult();

        //      for (String string : locations) {
        //         if(string.contains(" ") && string.matches(".*\\d+.*"))
        //            System.out.println(string);
        //      }
    }
    return filterLocations(locations);
}