List of usage examples for edu.stanford.nlp.pipeline StanfordCoreNLP NEWLINE_SPLITTER_PROPERTY
String NEWLINE_SPLITTER_PROPERTY
To view the source code for edu.stanford.nlp.pipeline StanfordCoreNLP NEWLINE_SPLITTER_PROPERTY.
Click Source Link
From source file:opennlp.tools.parse_thicket.opinion_processor.DefaultSentimentProcessor.java
License:Apache License
/** Runs the tree-based sentiment model on some text. */ public void processTextWithArgs(String[] args) throws IOException { String parserModel = null;// w w w . j a v a 2 s . co m String sentimentModel = null; String filename = null; String fileList = null; boolean stdin = false; boolean filterUnknown = false; List<Output> outputFormats = Collections.singletonList(Output.ROOT); Input inputFormat = Input.TEXT; String tlppClass = "DEFAULT_TLPP_CLASS"; for (int argIndex = 0; argIndex < args.length;) { if (args[argIndex].equalsIgnoreCase("-sentimentModel")) { sentimentModel = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-parserModel")) { parserModel = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-file")) { filename = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-fileList")) { fileList = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-stdin")) { stdin = true; argIndex++; } else if (args[argIndex].equalsIgnoreCase("-input")) { inputFormat = Input.valueOf(args[argIndex + 1].toUpperCase()); argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-output")) { String[] formats = args[argIndex + 1].split(","); outputFormats = new ArrayList<>(); for (String format : formats) { outputFormats.add(Output.valueOf(format.toUpperCase())); } argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-filterUnknown")) { filterUnknown = true; argIndex++; } else if (args[argIndex].equalsIgnoreCase("-tlppClass")) { tlppClass = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-help")) { System.exit(0); } else { log.info("Unknown argument " + args[argIndex + 1]); throw new IllegalArgumentException("Unknown argument " + args[argIndex + 1]); } } // We construct two pipelines. One handles tokenization, if // necessary. The other takes tokenized sentences and converts // them to sentiment trees. Properties pipelineProps = new Properties(); Properties tokenizerProps = null; if (sentimentModel != null) { pipelineProps.setProperty("sentiment.model", sentimentModel); } if (parserModel != null) { pipelineProps.setProperty("parse.model", parserModel); } if (inputFormat == Input.TREES) { pipelineProps.setProperty("annotators", "binarizer, sentiment"); pipelineProps.setProperty("customAnnotatorClass.binarizer", "edu.stanford.nlp.pipeline.BinarizerAnnotator"); pipelineProps.setProperty("binarizer.tlppClass", tlppClass); pipelineProps.setProperty("enforceRequirements", "false"); } else { pipelineProps.setProperty("annotators", "parse, sentiment"); pipelineProps.setProperty("enforceRequirements", "false"); tokenizerProps = new Properties(); tokenizerProps.setProperty("annotators", "tokenize, ssplit"); } if (stdin && tokenizerProps != null) { tokenizerProps.setProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "true"); } int count = 0; if (filename != null) count++; if (fileList != null) count++; if (stdin) count++; if (count > 1) { throw new IllegalArgumentException("Please only specify one of -file, -fileList or -stdin"); } if (count == 0) { throw new IllegalArgumentException("Please specify either -file, -fileList or -stdin"); } StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps); StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps); if (filename != null) { // Process a file. The pipeline will do tokenization, which // means it will split it into sentences as best as possible // with the tokenizer. List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, filename, filterUnknown); for (Annotation annotation : annotations) { pipeline.annotate(annotation); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { System.out.println(sentence); outputTree(System.out, sentence, outputFormats); } } } else if (fileList != null) { // Process multiple files. The pipeline will do tokenization, // which means it will split it into sentences as best as // possible with the tokenizer. Output will go to filename.out // for each file. for (String file : fileList.split(",")) { List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, file, filterUnknown); FileOutputStream fout = new FileOutputStream(file + ".out"); PrintStream pout = new PrintStream(fout); for (Annotation annotation : annotations) { pipeline.annotate(annotation); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { pout.println(sentence); outputTree(pout, sentence, outputFormats); } } pout.flush(); fout.close(); } } else { // Process stdin. Each line will be treated as a single sentence. log.info("Reading in text from stdin."); log.info("Please enter one sentence per line."); log.info("Processing will end when EOF is reached."); BufferedReader reader = IOUtils.readerFromStdin("utf-8"); for (String line; (line = reader.readLine()) != null;) { line = line.trim(); if (!line.isEmpty()) { Annotation annotation = tokenizer.process(line); pipeline.annotate(annotation); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { outputTree(System.out, sentence, outputFormats); } } else { // Output blank lines for blank lines so the tool can be // used for line-by-line text processing System.out.println(); } } } }