List of usage examples for edu.stanford.nlp.ie AbstractSequenceClassifier makePlainTextReaderAndWriter
public DocumentReaderAndWriter<IN> makePlainTextReaderAndWriter()
From source file:fire.NERDemo.java
public static void main(String[] args) throws Exception { String serializedClassifier = "C:\\Users\\DIPANAKR\\Desktop\\Satanu\\fire\\stanford-ner-2015-04-20\\stanford-ner-2015-04-20\\classifiers\\english.all.3class.distsim.crf.ser.gz"; if (args.length > 0) { serializedClassifier = args[0];// w w w.j av a 2 s .c om } AbstractSequenceClassifier<CoreLabel> classifier = CRFClassifier.getClassifier(serializedClassifier); /* For either a file to annotate or for the hardcoded text example, this demo file shows several ways to process the input, for teaching purposes. */ if (args.length > 1) { /* For the file, it shows (1) how to run NER on a String, (2) how to get the entities in the String with character offsets, and (3) how to run NER on a whole file (without loading it into a String). */ String fileContents = IOUtils.slurpFile(args[1]); List<List<CoreLabel>> out = classifier.classify(fileContents); for (List<CoreLabel> sentence : out) { for (CoreLabel word : sentence) { System.out.print(word.word() + '/' + word.get(CoreAnnotations.AnswerAnnotation.class) + ' '); } System.out.println(); } System.out.println("---"); out = classifier.classifyFile(args[1]); for (List<CoreLabel> sentence : out) { for (CoreLabel word : sentence) { System.out.print(word.word() + '/' + word.get(CoreAnnotations.AnswerAnnotation.class) + ' '); } System.out.println(); } System.out.println("---"); List<Triple<String, Integer, Integer>> list = classifier.classifyToCharacterOffsets(fileContents); for (Triple<String, Integer, Integer> item : list) { System.out.println(item.first() + ": " + fileContents.substring(item.second(), item.third())); } System.out.println("---"); System.out.println("Ten best entity labelings"); DocumentReaderAndWriter<CoreLabel> readerAndWriter = classifier.makePlainTextReaderAndWriter(); classifier.classifyAndWriteAnswersKBest(args[1], 10, readerAndWriter); System.out.println("---"); System.out.println("Per-token marginalized probabilities"); classifier.printProbs(args[1], readerAndWriter); // -- This code prints out the first order (token pair) clique probabilities. // -- But that output is a bit overwhelming, so we leave it commented out by default. // System.out.println("---"); // System.out.println("First Order Clique Probabilities"); // ((CRFClassifier) classifier).printFirstOrderProbs(args[1], readerAndWriter); } else { /* For the hard-coded String, it shows how to run it on a single sentence, and how to do this and produce several formats, including slash tags and an inline XML output format. It also shows the full contents of the {@code CoreLabel}s that are constructed by the classifier. And it shows getting out the probabilities of different assignments and an n-best list of classifications with probabilities. */ String[] example = { "Good afternoon Rajat Raina, how are you today?", "I go to school at Stanford University, which is located in California." }; for (String str : example) { System.out.println(classifier.classifyToString(str)); } System.out.println("---"); for (String str : example) { // This one puts in spaces and newlines between tokens, so just print not println. System.out.print(classifier.classifyToString(str, "slashTags", false)); } System.out.println("---"); for (String str : example) { // This one is best for dealing with the output as a TSV (tab-separated column) file. // The first column gives entities, the second their classes, and the third the remaining text in a document System.out.print(classifier.classifyToString(str, "tabbedEntities", false)); } System.out.println("---"); for (String str : example) { System.out.println(classifier.classifyWithInlineXML(str)); } System.out.println("---"); for (String str : example) { System.out.println(classifier.classifyToString(str, "xml", true)); } System.out.println("---"); for (String str : example) { System.out.print(classifier.classifyToString(str, "tsv", false)); } System.out.println("---"); // This gets out entities with character offsets int j = 0; for (String str : example) { j++; List<Triple<String, Integer, Integer>> triples = classifier.classifyToCharacterOffsets(str); for (Triple<String, Integer, Integer> trip : triples) { System.out.printf("%s over character offsets [%d, %d) in sentence %d.%n", trip.first(), trip.second(), trip.third, j); } } System.out.println("---"); // This prints out all the details of what is stored for each token int i = 0; for (String str : example) { for (List<CoreLabel> lcl : classifier.classify(str)) { for (CoreLabel cl : lcl) { System.out.print(i++ + ": "); System.out.println(cl.toShorterString()); } } } System.out.println("---"); } }