List of usage examples for edu.stanford.nlp.util StringUtils argsToProperties
public static Properties argsToProperties(String... args)
From source file:DependencyParse.java
License:Apache License
public static void main(String[] args) throws Exception { Properties props = StringUtils.argsToProperties(args); if (!props.containsKey("tokpath") || !props.containsKey("parentpath") || !props.containsKey("relpath")) { System.err.println(//from ww w. j a va 2 s . c o m "usage: java DependencyParse -tokenize - -tokpath <tokpath> -parentpath <parentpath> -relpath <relpath>"); System.exit(1); } boolean tokenize = false; if (props.containsKey("tokenize")) { tokenize = true; } String tokPath = props.getProperty("tokpath"); String parentPath = props.getProperty("parentpath"); String relPath = props.getProperty("relpath"); BufferedWriter tokWriter = new BufferedWriter(new FileWriter(tokPath)); BufferedWriter parentWriter = new BufferedWriter(new FileWriter(parentPath)); BufferedWriter relWriter = new BufferedWriter(new FileWriter(relPath)); MaxentTagger tagger = new MaxentTagger(TAGGER_MODEL); DependencyParser parser = DependencyParser.loadFromModelFile(PARSER_MODEL); Scanner stdin = new Scanner(System.in); int count = 0; long start = System.currentTimeMillis(); while (stdin.hasNextLine()) { String line = stdin.nextLine(); List<HasWord> tokens = new ArrayList<>(); if (tokenize) { PTBTokenizer<Word> tokenizer = new PTBTokenizer(new StringReader(line), new WordTokenFactory(), ""); for (Word label; tokenizer.hasNext();) { tokens.add(tokenizer.next()); } } else { for (String word : line.split(" ")) { tokens.add(new Word(word)); } } List<TaggedWord> tagged = tagger.tagSentence(tokens); int len = tagged.size(); Collection<TypedDependency> tdl = parser.predict(tagged).typedDependencies(); int[] parents = new int[len]; for (int i = 0; i < len; i++) { // if a node has a parent of -1 at the end of parsing, then the node // has no parent. parents[i] = -1; } String[] relns = new String[len]; for (TypedDependency td : tdl) { // let root have index 0 int child = td.dep().index(); int parent = td.gov().index(); relns[child - 1] = td.reln().toString(); parents[child - 1] = parent; } // print tokens StringBuilder sb = new StringBuilder(); for (int i = 0; i < len - 1; i++) { if (tokenize) { sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word())); } else { sb.append(tokens.get(i).word()); } sb.append(' '); } if (tokenize) { sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word())); } else { sb.append(tokens.get(len - 1).word()); } sb.append('\n'); tokWriter.write(sb.toString()); // print parent pointers sb = new StringBuilder(); for (int i = 0; i < len - 1; i++) { sb.append(parents[i]); sb.append(' '); } sb.append(parents[len - 1]); sb.append('\n'); parentWriter.write(sb.toString()); // print relations sb = new StringBuilder(); for (int i = 0; i < len - 1; i++) { sb.append(relns[i]); sb.append(' '); } sb.append(relns[len - 1]); sb.append('\n'); relWriter.write(sb.toString()); count++; if (count % 1000 == 0) { double elapsed = (System.currentTimeMillis() - start) / 1000.0; System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed); } } long totalTimeMillis = System.currentTimeMillis() - start; System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n", count, totalTimeMillis / 1000.0, totalTimeMillis / (double) count); tokWriter.close(); parentWriter.close(); relWriter.close(); }
From source file:ConstituencyParse.java
License:Apache License
public static void main(String[] args) throws Exception { Properties props = StringUtils.argsToProperties(args); if (!props.containsKey("parentpath")) { System.err.println(/* ww w.java 2s.co m*/ "usage: java ConstituencyParse -deps - -tokenize - -tokpath <tokpath> -parentpath <parentpath>"); System.exit(1); } // whether to tokenize input sentences boolean tokenize = false; if (props.containsKey("tokenize")) { tokenize = true; } // whether to produce dependency trees from the constituency parse boolean deps = false; if (props.containsKey("deps")) { deps = true; } String tokPath = props.containsKey("tokpath") ? props.getProperty("tokpath") : null; String parentPath = props.getProperty("parentpath"); ConstituencyParse processor = new ConstituencyParse(tokPath, parentPath, tokenize); Scanner stdin = new Scanner(System.in); int count = 0; long start = System.currentTimeMillis(); while (stdin.hasNextLine()) { String line = stdin.nextLine(); List<HasWord> tokens = processor.sentenceToTokens(line); Tree parse = processor.parse(tokens); // produce parent pointer representation int[] parents = deps ? processor.depTreeParents(parse, tokens) : processor.constTreeParents(parse); // print if (tokPath != null) { processor.printTokens(tokens); } processor.printParents(parents); count++; if (count % 1000 == 0) { double elapsed = (System.currentTimeMillis() - start) / 1000.0; System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed); } } long totalTimeMillis = System.currentTimeMillis() - start; System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n", count, totalTimeMillis / 1000.0, totalTimeMillis / (double) count); processor.close(); }
From source file:com.koalephant.nlp.RegexNERValidator.java
License:Open Source License
/** * Main method/* w ww .j a va2 s . co m*/ * * @param args program arguments, mostly -properties * @throws RuntimeException */ public static void main(String args[]) throws RuntimeException { if (args.length == 0) { throw new RuntimeException("No arguments specified"); } Properties properties = StringUtils.argsToProperties(args); if (properties.containsKey("inFile")) { if (properties.containsKey("outFile")) { quoteMappings(properties.getProperty("inFile"), properties.getProperty("outFile")); } else { quoteMappings(properties.getProperty("inFile")); } } else { testMappings(properties); } }
From source file:com.koalephant.nlp.StanfordCoreNLPHTTPServer.java
License:Open Source License
/** * Start the Server/*from w w w .jav a 2 s . c o m*/ * * @param args command line arguments * @throws Exception */ public static void main(String args[]) throws Exception { Properties props = new Properties(); // Load properties from the command line if (args.length > 0) { log.info("Reading Opts..."); props = StringUtils.argsToProperties(args); } // use -host if given host = props.getProperty("host", host); props.remove("host"); String portOpt = props.getProperty("port"); // use -port if given if (portOpt != null) { try { props.remove("port"); port = Integer.parseInt(portOpt); } catch (Exception e) { System.err.println("Invalid port specified: " + portOpt); System.exit(1); } } String defaultTypeStr = props.getProperty("defaultType"); if (defaultTypeStr != null) { try { defaultType = MediaType.getFromType(defaultTypeStr, defaultType); } catch (IllegalArgumentException e) { System.err.println(e.getMessage()); System.exit(1); } } // start the server Container container = new StanfordCoreNLPHTTPServer(new StanfordCoreNLP(props)); log.info("Attempting to listen on " + host + ":" + port + "."); Server server = new ContainerServer(container); Connection connection = new SocketConnection(server); SocketAddress address = new InetSocketAddress(host, port); connection.connect(address); log.info("Initialized server at " + host + ":" + port + "."); }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordPosTagger.java
License:Open Source License
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); modelProvider = new ModelProviderBase<MaxentTagger>() { {/*from w w w.ja v a 2 s .co m*/ setContextObject(StanfordPosTagger.this); setDefault(ARTIFACT_ID, "${groupId}.stanfordnlp-model-tagger-${language}-${variant}"); setDefault(LOCATION, "classpath:/${package}/lib/tagger-${language}-${variant}.properties"); setDefaultVariantsLocation("${package}/lib/tagger-default-variants.map"); setOverride(LOCATION, modelLocation); setOverride(LANGUAGE, language); setOverride(VARIANT, variant); } @Override protected MaxentTagger produceResource(URL aUrl) throws IOException { String modelFile = aUrl.toString(); MaxentTagger tagger = new MaxentTagger(modelFile, StringUtils.argsToProperties(new String[] { "-model", modelFile }), false); SingletonTagset tags = new SingletonTagset(POS.class, getResourceMetaData().getProperty(("pos.tagset"))); tags.addAll(tagger.tagSet()); addTagset(tags); if (printTagSet) { getContext().getLogger().log(INFO, getTagset().toString()); } return tagger; } }; posMappingProvider = MappingProviderFactory.createPosMappingProvider(posMappingLocation, language, modelProvider); posMappingProvider.setDefaultVariantsLocation( "de/tudarmstadt/ukp/dkpro/core/stanfordnlp/lib/tagger-default-variants.map"); }
From source file:edu.cmu.geolocator.nlp.ner.StanfordCRF.CRF_Learn.java
License:Apache License
public static void main(String[] argc) throws Exception { //StringUtils.printErrInvocationString("CRFClassifier", args); String[] args = new String[2]; args[0] = "-prop"; args[1] = "src/edu/cmu/geoparser/nlptools/ner/StanfordCRF/train.prop"; Properties props = StringUtils.argsToProperties(args); CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(props); String testFile = crf.flags.testFile; String testFiles = crf.flags.testFiles; String textFile = crf.flags.textFile; String textFiles = crf.flags.textFiles; String loadPath = crf.flags.loadClassifier; String loadTextPath = crf.flags.loadTextClassifier; String serializeTo = crf.flags.serializeTo; String serializeToText = crf.flags.serializeToText; if (loadPath != null) { crf.loadClassifierNoExceptions(loadPath, props); } else if (loadTextPath != null) { System.err.println("Warning: this is now only tested for Chinese Segmenter"); System.err.println("(Sun Dec 23 00:59:39 2007) (pichuan)"); try {/*www . j a v a 2s .co m*/ crf.loadTextClassifier(loadTextPath, props); // System.err.println("DEBUG: out from crf.loadTextClassifier"); } catch (Exception e) { throw new RuntimeException("error loading " + loadTextPath, e); } } else if (crf.flags.loadJarClassifier != null) { crf.loadJarClassifier(crf.flags.loadJarClassifier, props); } else if (crf.flags.trainFile != null || crf.flags.trainFileList != null) { //Wei Zhang: This is where the program starts to train. crf.train(); //////////// } else { crf.loadDefaultClassifier(); } // System.err.println("Using " + crf.flags.featureFactory); // System.err.println("Using " + // StringUtils.getShortClassName(crf.readerAndWriter)); if (serializeTo != null) { //Wei Zhang: This is used. crf.serializeClassifier(serializeTo); ///////////////////////// } if (serializeToText != null) { crf.serializeTextClassifier(serializeToText); } if (testFile != null) { DocumentReaderAndWriter<CoreLabel> readerAndWriter = crf.defaultReaderAndWriter(); if (crf.flags.searchGraphPrefix != null) { crf.classifyAndWriteViterbiSearchGraph(testFile, crf.flags.searchGraphPrefix, crf.makeReaderAndWriter()); } else if (crf.flags.printFirstOrderProbs) { crf.printFirstOrderProbs(testFile, readerAndWriter); } else if (crf.flags.printProbs) { crf.printProbs(testFile, readerAndWriter); } else if (crf.flags.useKBest) { int k = crf.flags.kBest; crf.classifyAndWriteAnswersKBest(testFile, k, readerAndWriter); } else if (crf.flags.printLabelValue) { crf.printLabelInformation(testFile, readerAndWriter); } else { crf.classifyAndWriteAnswers(testFile, readerAndWriter); } } if (testFiles != null) { List<File> files = new ArrayList<File>(); for (String filename : testFiles.split(",")) { files.add(new File(filename)); } crf.classifyAndWriteAnswers(files, crf.defaultReaderAndWriter()); } if (textFile != null) { crf.classifyAndWriteAnswers(textFile); } if (textFiles != null) { List<File> files = new ArrayList<File>(); for (String filename : textFiles.split(",")) { files.add(new File(filename)); } crf.classifyAndWriteAnswers(files); } if (crf.flags.readStdin) { crf.classifyStdin(); } }
From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java
License:Open Source License
/** * Needs the following properties: -props 'Location of coref.properties' * * @throws Exception/*from ww w .ja va2 s .c om*/ */ public static void main(String[] args) throws Exception { Properties props = StringUtils.argsToProperties(args); initializeAndRunCoref(props); }
From source file:lv.lumii.ner.analysis.ClassifierComparator.java
License:Open Source License
public static void main(String[] args) throws ClassCastException, ClassNotFoundException, IOException { Properties props = new Properties(); props = StringUtils.argsToProperties(args); inputFile = props.getProperty("inputFile", null); String propString = "" + "-loadClassifier lv-ner-model.ser.gz" //+ " -whiteList Gazetteer/DB_persons.txt,Gazetteer/DB_locations.txt,Gazetteer/DB_professions.txt,Gazetteer/Laura_partijas_lem.txt,Gazetteer/AZ_valsts_parvaldes_struktura_lem.txt,D:/LUMII/ner/improve_9.1.2014/leta-2014-jan/export/stats/DB_organizations2.txt" //+ "-loadClassifier D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-1.ser.gz;D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-2.ser.gz;D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-3.ser.gz" //";D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-4.ser.gz;D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-5.ser.gz" + ""; //props = StringUtils.argsToProperties(propString.split(" ")); System.err.println(props);/* www. j a v a 2s . com*/ ClassifierComparator cc = new ClassifierComparator(props); for (Integer i = 0; i < 10; i++) { if (props.containsKey("classifier" + i.toString())) { System.err.println("classifier" + i.toString()); String[] parts = props.getProperty("classifier" + i.toString()).split("\\s*\\|\\s*"); List<AbstractSequenceClassifier<CoreLabel>> nccl = new ArrayList<>(); for (String part : parts) { System.err.println("\t" + part); String[] items = part.split("\\s*=\\s*"); System.err.println("\t" + part); if (items[0].equalsIgnoreCase("whiteList")) { System.err.println("\twhiteList" + items[1]); nccl.add(new ListNERSequenceClassifier(items[1], true, true, true)); } if (items[0].equalsIgnoreCase("loadClassifier")) { System.err.println("\tloadClassifier" + items[1]); nccl.add(CRFClassifier.getClassifier(items[1], props)); } if (items[0].equalsIgnoreCase("regexList")) { System.err.println("\tregexListt" + items[1]); nccl.add(new RegexNERSequenceClassifier(items[1], true, true)); } } cc.addClassifier(nccl); } } if (props.containsKey("whiteList")) { String whiteListString = props.getProperty("whiteList"); String whiteLists[] = whiteListString.split(";"); for (String whiteList : whiteLists) { whiteList = whiteList.trim(); cc.addClassifier(new ListNERSequenceClassifier(whiteList, true, true, true)); } } if (props.containsKey("loadClassifier")) { String loadClassifierString = props.getProperty("loadClassifier"); String loadClassifiers[] = loadClassifierString.split(";"); for (String loadClassifier : loadClassifiers) { loadClassifier = loadClassifier.trim(); cc.addClassifier(CRFClassifier.getClassifier(loadClassifier, props)); } } if (props.containsKey("regexList")) { String regexListString = props.getProperty("regexList"); String regexLists[] = regexListString.split(";"); for (String regexList : regexLists) { regexList = regexList.trim(); cc.addClassifier(new RegexNERSequenceClassifier(regexList, true, true)); } } LVCoNLLDocumentReaderAndWriter reader = new LVCoNLLDocumentReaderAndWriter(); //List<CoreLabel> doc = reader.readCONLL("D:/LUMII/ner/improve_9.1.2014/leta-2014-jan/export/FB1A480C-5109-4D34-AFDF-FD4B9CC6E790.conll"); //inputFile = "D:/LUMII/ner/improve_9.1.2014/leta-2014-jan/export/leta-2014-jan-test.tab"; List<CoreLabel> doc = reader.readCONLL(inputFile); cc.classify(doc); reader.outputType = LVCoNLLDocumentReaderAndWriter.outputTypes.COMPARE; //reader.printAnswers(cc.document, new PrintWriter(System.out)); cc.entity_stats().preview(); }
From source file:lv.lumii.ner.NerPipe.java
License:Open Source License
public static void main(String[] args) throws IOException, ClassCastException, ClassNotFoundException { Properties props = new Properties(); props = StringUtils.argsToProperties(args); if (props.containsKey("h") || props.containsKey("help") || props.containsKey("?")) { System.out.println("LV Named Entity Recogniser"); System.out.println("\nInput formats"); System.out.println("\tDefault : conll-in"); System.out.println(/*from w w w .j a v a 2s. c o m*/ "\t-conll-in : CONLL shared task data format - one line per token, with tab-delimited columns, sentences separated by blank lines."); System.out.println("\nOutput formats"); System.out.println("\tDefault : conll-x"); System.out.println( "\t-conll-x : CONLL-X shared task data format - one line per token, with tab-delimited columns, sentences separated by blank lines."); ; System.out.println("\t-simple : Simple compare format used for ner analysis"); System.out.println("\t-toFeatures : add ner key and value to morphoFeature string"); System.out.println("\nOther options:"); System.out.println("\t-saveExtraColumns : save extra columns after typical conll input (6 columns)"); System.out.println("\t-whiteList : files containing white list named entities (separated by comma)"); System.out.flush(); System.exit(0); } NerPipe ner = new NerPipe(props); ner.classifyDocumentStdin(); //CRFClassifier<CoreLabel> classifier = CRFClassifier.getClassifier(NerPipe.defaultCrfClassifier, props); ///LVCoNLLDocumentReaderAndWriter lvconll = new LVCoNLLDocumentReaderAndWriter(); //lvconll.init(classifier.flags); // ObjectBank<List<CoreLabel>> b = classifier.makeObjectBankFromFile("z_tomins.conll",lvconll); // classifier.printProbsDocuments(b); // classifier.printFirstOrderProbsDocuments(b); //try {classifier.printLabelInformation("z_tomins.conll", lvconll);} catch (Exception e) {e.printStackTrace();} }
From source file:LVCoref.LVCoref.java
License:Open Source License
public static void main(String[] args) throws Exception { Properties properties = StringUtils.argsToProperties(args); initializeProperties(properties);/*from ww w . j a v a2 s.com*/ // if (true) { // parserScoreCompare(); // return; // } if (Constants.SCORE) { scoreDirectory(Constants.SCORE_PATH); return; } if (Constants.TAG_FOLDER) { annotateFolder(Constants.TAG_FOLDER_PATH, Constants.TAG_FOLDER_OUT); return; } int docID = 0; // document counter BufferedReader in; switch (inputType) { case CONLL: Document d = new Document(dictionaries); try { d.readCONLL(conllInput); } catch (Exception ex) { log.severe("Could not read conll file"); ex.printStackTrace(); break; } if (d.tree.size() > 0) processDocument(d); break; case STDIN_JSON: in = new BufferedReader(new InputStreamReader(System.in, "UTF8")); while (true) { Document doc = new Document(dictionaries); doc.id = Integer.toString(docID++); try { doc.readJSON(in); } catch (Exception ex) { log.severe("Could not read json from stream"); ex.printStackTrace(); break; } if (doc.tree.size() > 0) processDocument(doc); else break; } break; default: in = new BufferedReader(new InputStreamReader(System.in, "UTF8")); while (true) { Document doc = new Document(dictionaries); doc.id = Integer.toString(docID++); try { doc.readCONLL(in); } catch (Exception ex) { log.severe("Could not read conll from stream"); ex.printStackTrace(); break; } if (doc.tree.size() > 0) processDocument(doc); else break; } } }