Example usage for edu.stanford.nlp.ling Word Word

List of usage examples for edu.stanford.nlp.ling Word Word

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling Word Word.

Prototype

public Word(String word, int beginPosition, int endPosition) 

Source Link

Document

Construct a new word, with the given value.

Usage

From source file:RBBNPE.POSBasedBaseNounPhraseExtractor.java

License:Open Source License

/**
 * Extracts all base noun phrases from a given file in the CoNLL data format.
 * Results are saved internally and are available for output or saving
 * The tokens have to be in the first column
 * Columns have to be either seperated by a whitespace or a tab
 *
 * @param path absolute path to the CoNLL File
 * @throws IOException/*from w  w  w . j a  v a2  s  .  com*/
 */
public void extractBaseNounPhrasesFromCoNLLData(String path) throws IOException {

    dictionaryWithTaggedSentenceForBaseNP = new HashMap<BaseNounPhrase, List<TaggedWord>>();

    List<List<HasWord>> sentences = new ArrayList();
    List<HasWord> sentence = new ArrayList<HasWord>();

    BufferedReader br = new BufferedReader(new FileReader(path));
    String currentLine;
    int currentStartPosition = 0;

    while (null != (currentLine = br.readLine())) {

        if (!currentLine.equals("") && !currentLine.contains("\t\t")) {

            String[] argumentsInLine = currentLine.split(" ");

            if (argumentsInLine.length <= 2) {
                argumentsInLine = currentLine.split("\t");
            }

            String cleanToken = argumentsInLine[0]/*.replace("\\/", "//")*/;

            int currentEndPosition = currentStartPosition + cleanToken.length() - 1;

            sentence.add(new Word(cleanToken, currentStartPosition, currentEndPosition));

            currentStartPosition = currentEndPosition + 2;

        } else if (currentLine.equals("") || currentLine.equals("\t\t")) {
            sentences.add(sentence);
            sentence = new ArrayList<HasWord>();
        } else {
            System.out.println("Strange Line occured: " + currentLine);
        }
    }
    if (sentence.size() >= 0) {
        sentences.add(sentence); //saves last Sentence, when no empty line follows it
    }

    System.out.println("Finished Processing the text");

    System.out.println("Starting tagging");
    taggedSentences = tagWithPOSTags(sentences);
    System.out.println("Finished tagging the text");

    System.out.println("Starting application of positive rules");
    extractedBaseNounPhrases = applyPositiveRules(taggedSentences);
    System.out.println("Finished application of positive rules");

    System.out.println("Sorting extracted phrases");
    sortExtractedPhrases();

    System.out.println("Starting application of rejection rules");
    extractedBaseNounPhrases = applyRejectionRules(extractedBaseNounPhrases);
    System.out.println("Finished application of rejection rules");

}