List of usage examples for edu.stanford.nlp.ling Word Word
public Word(String word, int beginPosition, int endPosition)
From source file:RBBNPE.POSBasedBaseNounPhraseExtractor.java
License:Open Source License
/** * Extracts all base noun phrases from a given file in the CoNLL data format. * Results are saved internally and are available for output or saving * The tokens have to be in the first column * Columns have to be either seperated by a whitespace or a tab * * @param path absolute path to the CoNLL File * @throws IOException/*from w w w . j a v a2 s . com*/ */ public void extractBaseNounPhrasesFromCoNLLData(String path) throws IOException { dictionaryWithTaggedSentenceForBaseNP = new HashMap<BaseNounPhrase, List<TaggedWord>>(); List<List<HasWord>> sentences = new ArrayList(); List<HasWord> sentence = new ArrayList<HasWord>(); BufferedReader br = new BufferedReader(new FileReader(path)); String currentLine; int currentStartPosition = 0; while (null != (currentLine = br.readLine())) { if (!currentLine.equals("") && !currentLine.contains("\t\t")) { String[] argumentsInLine = currentLine.split(" "); if (argumentsInLine.length <= 2) { argumentsInLine = currentLine.split("\t"); } String cleanToken = argumentsInLine[0]/*.replace("\\/", "//")*/; int currentEndPosition = currentStartPosition + cleanToken.length() - 1; sentence.add(new Word(cleanToken, currentStartPosition, currentEndPosition)); currentStartPosition = currentEndPosition + 2; } else if (currentLine.equals("") || currentLine.equals("\t\t")) { sentences.add(sentence); sentence = new ArrayList<HasWord>(); } else { System.out.println("Strange Line occured: " + currentLine); } } if (sentence.size() >= 0) { sentences.add(sentence); //saves last Sentence, when no empty line follows it } System.out.println("Finished Processing the text"); System.out.println("Starting tagging"); taggedSentences = tagWithPOSTags(sentences); System.out.println("Finished tagging the text"); System.out.println("Starting application of positive rules"); extractedBaseNounPhrases = applyPositiveRules(taggedSentences); System.out.println("Finished application of positive rules"); System.out.println("Sorting extracted phrases"); sortExtractedPhrases(); System.out.println("Starting application of rejection rules"); extractedBaseNounPhrases = applyRejectionRules(extractedBaseNounPhrases); System.out.println("Finished application of rejection rules"); }