List of usage examples for edu.stanford.nlp.util StringUtils argsToProperties
public static Properties argsToProperties(String[] args, Map<String, Integer> flagsToNumArgs)
From source file:ilcc.ccgparser.nnparser.IncNNParser.java
public static void main(String[] args) throws IOException, Exception { String trainAutoFile, trainConllFile, trainPargFile, testAutoFile, testPargFile, testConllFile, outAutoFile, outPargFile, modelFile, embedFile; String home = "/home/ambati/ilcc/projects/parsing/experiments/english/ccg/"; trainAutoFile = home + "data/final/train.gccg.auto"; trainConllFile = home + "data/final/train.accg.conll2"; trainPargFile = home + "data/final/train.gccg.parg"; trainAutoFile = home + "data/final/devel.gccg.auto"; trainConllFile = home + "data/final/devel.accg.conll"; trainPargFile = home + "data/final/devel.gccg.parg"; //testAutoFile = home+"data/final/devel.gccg.auto"; testAutoFile = ""; testPargFile = home + "data/final/devel.gccg.pargx"; testConllFile = home + "data/final/devel.accg.conllx"; outAutoFile = home + "models/out1.txt"; outPargFile = home + "models/out2.txt"; modelFile = home + "models/nnccg.model.txt.gz"; embedFile = "/home/ambati/ilcc/tools/neural-networks/embeddings/turian/embeddings.raw"; if (args.length == 0) { args = new String[] { "-trainCoNLL", trainConllFile, "-trainAuto", trainAutoFile, "-trainParg", trainPargFile, "-testCoNLL", testConllFile, "-testAuto", testAutoFile, "-testParg", testPargFile, "-outParg", outPargFile, "-model", modelFile, "-beam", "1", "-embedFile", embedFile, "-maxIter", "1", "-lookAhead", "" + false //"-isTrain", "true", "-beam", 1, "-debug", "false", "-early", false };//from w ww. java 2 s . c om } Properties props = StringUtils.argsToProperties(args, numArgs); IncNNParser incnnpar = new IncNNParser(props); long start; System.err.println("Started Training: " + new Date(System.currentTimeMillis()) + "\n"); start = (long) (System.currentTimeMillis()); if (props.getProperty("trainCoNLL") != null) incnnpar.train(props.getProperty("trainCoNLL"), props.getProperty("testCoNLL"), props.getProperty("model"), props.getProperty("embedFile"), props.getProperty("preModel")); System.err.println("Training Time: " + (System.currentTimeMillis() - start) / 1000.0 + " (s)"); System.err.println("Loading Model: " + new Date(System.currentTimeMillis()) + "\n"); incnnpar.loadModelFile(props.getProperty("model")); System.err.println("Started Parsing: " + new Date(System.currentTimeMillis()) + "\n"); start = (long) (System.currentTimeMillis()); incnnpar.test(props.getProperty("testCoNLL"), props.getProperty("testAuto"), props.getProperty("testParg")); System.err.println("Parsing Time: " + (System.currentTimeMillis() - start) / 1000.0 + " (s)"); }
From source file:ilcc.ccgparser.nnparser.IncNNParserGreedy.java
public static void main(String[] args) throws IOException, Exception { String trainAutoFile, trainConllFile, trainPargFile, testAutoFile, testPargFile, testConllFile, outAutoFile, outPargFile, modelFile, embedFile; String home = "/home/ambati/ilcc/projects/parsing/experiments/english/ccg/"; trainAutoFile = home + "data/final/train.gccg.auto"; trainConllFile = home + "data/final/train.accg.conll2"; trainPargFile = home + "data/final/train.gccg.parg"; trainAutoFile = home + "data/final/devel.gccg.auto"; trainConllFile = home + "data/final/devel.accg.conll"; trainPargFile = home + "data/final/devel.gccg.parg"; //testAutoFile = home+"data/final/devel.gccg.auto"; testAutoFile = ""; testPargFile = home + "data/final/devel.gccg.pargx"; testConllFile = home + "data/final/devel.accg.conllx"; outAutoFile = home + "models/out1.txt"; outPargFile = home + "models/out2.txt"; modelFile = home + "models/nnccg.model.txt.gz"; embedFile = "/home/ambati/ilcc/tools/neural-networks/embeddings/turian/embeddings.raw"; if (args.length == 0) { args = new String[] { "-trainCoNLL", trainConllFile, "-trainAuto", trainAutoFile, "-trainParg", trainPargFile, "-testCoNLL", testConllFile, "-testAuto", testAutoFile, "-testParg", testPargFile, "-outParg", outPargFile, "-model", modelFile, "-beam", "1", "-embedFile", embedFile, "-maxIter", "1", //"-isTrain", "true", "-beam", 1, "-debug", "false", "-early", false };/*from w w w . j a v a 2 s .co m*/ } Properties props = StringUtils.argsToProperties(args, numArgs); IncNNParserGreedy incnnpar = new IncNNParserGreedy(props); long start; System.err.println("Started Training: " + new Date(System.currentTimeMillis()) + "\n"); start = (long) (System.currentTimeMillis()); if (props.getProperty("trainCoNLL") != null) incnnpar.train(props.getProperty("trainCoNLL"), props.getProperty("testCoNLL"), props.getProperty("model"), props.getProperty("embedFile"), props.getProperty("preModel")); System.err.println("Training Time: " + (System.currentTimeMillis() - start) / 1000.0 + " (s)"); System.err.println("Loading Model: " + new Date(System.currentTimeMillis()) + "\n"); incnnpar.loadModelFile(props.getProperty("model")); System.err.println("Started Parsing: " + new Date(System.currentTimeMillis()) + "\n"); start = (long) (System.currentTimeMillis()); incnnpar.test(props.getProperty("testCoNLL"), props.getProperty("testAuto"), props.getProperty("testParg")); System.err.println("Parsing Time: " + (System.currentTimeMillis() - start) / 1000.0 + " (s)"); }
From source file:ilcc.ccgparser.test.IncExtractProb.java
public static void main(String[] args) throws IOException, Exception { String testAutoFile, testConllFile, outFile, modelFile, embedFile; String home = "/home/ambati/ilcc/projects/parsing/experiments/english/ccg/useful/ccg-all/"; modelFile = home + "models/nn.gincint.model.txt.gz"; embedFile = "/home/ambati/ilcc/tools/neural-networks/embeddings/turian/embeddings.raw"; testAutoFile = home + "/devel.per.b16incin.auto"; testConllFile = home + "../../data/final/devel.innccg.conll"; outFile = home + "/out1.txt"; String eeg = "/home/ambati/ilcc/projects/parsing/experiments/eeg/data/"; testAutoFile = eeg + "inc/wsj.eeg.inninc.auto"; testConllFile = eeg + "inc/wsj.eeg.conll"; outFile = eeg + "inc/wsj.eeg.inninc.out.txt"; testAutoFile = eeg + "oracle/wsj.eeg.out.auto"; testConllFile = eeg + "oracle/wsj.eeg.inc.conll"; outFile = eeg + "oracle/wsj.eeg.out.allacts.txt"; testConllFile = eeg + "oracle/wsj.eeg.conll"; outFile = eeg + "oracle/wsj.eeg.out.txt"; if (args.length == 0) { args = new String[] { "-testAuto", testAutoFile, "-testCoNLL", testConllFile, "-outFile", outFile, "-model", modelFile, "-embedFile", embedFile, }; }//from w ww . ja v a 2 s .c o m Properties props = StringUtils.argsToProperties(args, numArgs); IncExtractProb incnnpar = new IncExtractProb(props); long start; System.err.println("Loading Model: " + new Date(System.currentTimeMillis()) + "\n"); incnnpar.loadModelFile(props.getProperty("model")); System.err.println("Started Parsing: " + new Date(System.currentTimeMillis()) + "\n"); start = (long) (System.currentTimeMillis()); incnnpar.test(props.getProperty("testCoNLL"), props.getProperty("testAuto"), props.getProperty("outFile")); System.err.println("Parsing Time: " + (System.currentTimeMillis() - start) / 1000.0 + " (s)"); }
From source file:ilcc.ccgparser.test.IncGetGoldDerivation.java
public static void main(String[] args) throws IOException, Exception { String trainAutoFile, trainConllFile, trainPargFile, testAutoFile, testPargFile, testConllFile, outAutoFile, outPargFile, modelFile, algo; String eeg = "/home/ambati/ilcc/projects/parsing/experiments/eeg/data/"; trainAutoFile = eeg + "oracle/wsj.eeg.auto"; trainConllFile = eeg + "oracle/wsj.eeg.conll"; outAutoFile = eeg + "oracle/wsj.eeg.out.auto"; if (args.length == 0) { args = new String[] { "-trainCoNLL", trainConllFile, "-trainAuto", trainAutoFile, "-outAuto", outAutoFile, };//from www. ja va2 s . com } Properties props = StringUtils.argsToProperties(args, numArgs); IncParser parser; parser = new IncParserGreedy(props); BufferedWriter out = new BufferedWriter(new FileWriter(new File(props.getProperty("outAuto")))); if (props.getProperty("trainCoNLL") != null) { List<CCGJTreeNode> trees = parser.fillData(); for (int i = 0; i < trees.size(); i++) { CCGJTreeNode tree = trees.get(i); if (tree != null) writeDeriv(i + 1, out, tree); else out.write("ID=" + (i + 1) + "\nError:No Derivation\n"); } } out.close(); }
From source file:process.PTBTokenizer.java
License:Open Source License
/** * Reads files given as arguments and print their tokens, by default as one * per line. This is useful either for testing or to run standalone to turn * a corpus into a one-token-per-line file of tokens. This main method * assumes that the input file is in utf-8 encoding, unless an encoding is * specified.//w w w . j a v a 2 s . com * <p/> * Usage: <code> * java edu.stanford.nlp.process.PTBTokenizer [options] filename+ * </code> * <p/> * Options: * <ul> * <li>-options options Set various tokenization options (see the * documentation in the class javadoc) * <li>-preserveLines Produce space-separated tokens, except when the * original had a line break, not one-token-per-line * <li>-encoding encoding Specifies a character encoding. If you do not * specify one, the default is utf-8 (not the platform default). * <li>-lowerCase Lowercase all tokens (on tokenization) * <li>-parseInside regex Names an XML-style element or a regular expression * over such elements. The tokenizer will only tokenize inside elements that * match this regex. (This is done by regex matching, not an XML parser, but * works well for simple XML documents, or other SGML-style documents, such * as Linguistic Data Consortium releases, which adopt the convention that a * line of a file is either XML markup or character data but never both.) * <li>-ioFileList file* The remaining command-line arguments are treated as * filenames that themselves contain lists of pairs of input-output * filenames (2 column, whitespace separated). * <li>-dump Print the whole of each CoreLabel, not just the value (word) * <li>-untok Heuristically untokenize tokenized text * <li>-h, -help Print usage info * </ul> * * @param args * Command line arguments * @throws IOException * If any file I/O problem */ public static void main(String[] args) throws IOException { edu.stanford.nlp.process.PTBTokenizer<HasWord> abctesTokenizer; Properties options = StringUtils.argsToProperties(args, optionArgDefs()); boolean showHelp = PropertiesUtils.getBool(options, "help", false); showHelp = PropertiesUtils.getBool(options, "h", showHelp); if (showHelp) { System.err.println("Usage: java edu.stanford.nlp.process.PTBTokenizer [options]* filename*"); System.err.println( " options: -h|-preserveLines|-lowerCase|-dump|-ioFileList|-encoding|-parseInside|-options"); System.exit(0); } StringBuilder optionsSB = new StringBuilder(); String tokenizerOptions = options.getProperty("options", null); if (tokenizerOptions != null) { optionsSB.append(tokenizerOptions); } boolean preserveLines = PropertiesUtils.getBool(options, "preserveLines", false); if (preserveLines) { optionsSB.append(",tokenizeNLs"); } boolean inputOutputFileList = PropertiesUtils.getBool(options, "ioFileList", false); boolean lowerCase = PropertiesUtils.getBool(options, "lowerCase", false); boolean dump = PropertiesUtils.getBool(options, "dump", false); boolean untok = PropertiesUtils.getBool(options, "untok", false); String charset = options.getProperty("encoding", "utf-8"); String parseInsideKey = options.getProperty("parseInside", null); Pattern parseInsidePattern = null; if (parseInsideKey != null) { try { parseInsidePattern = Pattern.compile("<(/?)(?:" + parseInsideKey + ")(?:\\s[^>]*?)?>"); } catch (PatternSyntaxException e) { // just go with null parseInsidePattern } } // Other arguments are filenames String parsedArgStr = options.getProperty("", null); String[] parsedArgs = (parsedArgStr == null) ? null : parsedArgStr.split("\\s+"); ArrayList<String> inputFileList = new ArrayList<String>(); ArrayList<String> outputFileList = null; if (inputOutputFileList && parsedArgs != null) { outputFileList = new ArrayList<String>(); for (String fileName : parsedArgs) { BufferedReader r = IOUtils.readerFromString(fileName, charset); for (String inLine; (inLine = r.readLine()) != null;) { String[] fields = inLine.split("\\s+"); inputFileList.add(fields[0]); if (fields.length > 1) { outputFileList.add(fields[1]); } else { outputFileList.add(fields[0] + ".tok"); } } r.close(); } } else if (parsedArgs != null) { // Concatenate input files into a single output file inputFileList.addAll(Arrays.asList(parsedArgs)); } if (untok) { untok(inputFileList, outputFileList, charset); } else { tok(inputFileList, outputFileList, charset, parseInsidePattern, optionsSB.toString(), preserveLines, dump, lowerCase); } }