Example usage for edu.stanford.nlp.util StringUtils argsToProperties

List of usage examples for edu.stanford.nlp.util StringUtils argsToProperties

Introduction

In this page you can find the example usage for edu.stanford.nlp.util StringUtils argsToProperties.

Prototype

public static Properties argsToProperties(String[] args, Map<String, Integer> flagsToNumArgs) 

Source Link

Document

Analogous to #argsToMap .

Usage

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

public static void main(String[] args) throws IOException, Exception {

    String trainAutoFile, trainConllFile, trainPargFile, testAutoFile, testPargFile, testConllFile, outAutoFile,
            outPargFile, modelFile, embedFile;

    String home = "/home/ambati/ilcc/projects/parsing/experiments/english/ccg/";
    trainAutoFile = home + "data/final/train.gccg.auto";
    trainConllFile = home + "data/final/train.accg.conll2";
    trainPargFile = home + "data/final/train.gccg.parg";
    trainAutoFile = home + "data/final/devel.gccg.auto";
    trainConllFile = home + "data/final/devel.accg.conll";
    trainPargFile = home + "data/final/devel.gccg.parg";
    //testAutoFile = home+"data/final/devel.gccg.auto";
    testAutoFile = "";
    testPargFile = home + "data/final/devel.gccg.pargx";
    testConllFile = home + "data/final/devel.accg.conllx";
    outAutoFile = home + "models/out1.txt";
    outPargFile = home + "models/out2.txt";
    modelFile = home + "models/nnccg.model.txt.gz";
    embedFile = "/home/ambati/ilcc/tools/neural-networks/embeddings/turian/embeddings.raw";

    if (args.length == 0) {
        args = new String[] { "-trainCoNLL", trainConllFile, "-trainAuto", trainAutoFile, "-trainParg",
                trainPargFile, "-testCoNLL", testConllFile, "-testAuto", testAutoFile, "-testParg",
                testPargFile, "-outParg", outPargFile, "-model", modelFile, "-beam", "1", "-embedFile",
                embedFile, "-maxIter", "1", "-lookAhead", "" + false
                //"-isTrain", "true", "-beam", 1, "-debug", "false", "-early", false
        };//from  w  ww. java 2  s .  c om
    }

    Properties props = StringUtils.argsToProperties(args, numArgs);
    IncNNParser incnnpar = new IncNNParser(props);

    long start;

    System.err.println("Started Training: " + new Date(System.currentTimeMillis()) + "\n");
    start = (long) (System.currentTimeMillis());
    if (props.getProperty("trainCoNLL") != null)
        incnnpar.train(props.getProperty("trainCoNLL"), props.getProperty("testCoNLL"),
                props.getProperty("model"), props.getProperty("embedFile"), props.getProperty("preModel"));
    System.err.println("Training Time: " + (System.currentTimeMillis() - start) / 1000.0 + " (s)");

    System.err.println("Loading Model: " + new Date(System.currentTimeMillis()) + "\n");
    incnnpar.loadModelFile(props.getProperty("model"));

    System.err.println("Started Parsing: " + new Date(System.currentTimeMillis()) + "\n");
    start = (long) (System.currentTimeMillis());
    incnnpar.test(props.getProperty("testCoNLL"), props.getProperty("testAuto"), props.getProperty("testParg"));
    System.err.println("Parsing Time: " + (System.currentTimeMillis() - start) / 1000.0 + " (s)");
}

From source file:ilcc.ccgparser.nnparser.IncNNParserGreedy.java

public static void main(String[] args) throws IOException, Exception {

    String trainAutoFile, trainConllFile, trainPargFile, testAutoFile, testPargFile, testConllFile, outAutoFile,
            outPargFile, modelFile, embedFile;

    String home = "/home/ambati/ilcc/projects/parsing/experiments/english/ccg/";
    trainAutoFile = home + "data/final/train.gccg.auto";
    trainConllFile = home + "data/final/train.accg.conll2";
    trainPargFile = home + "data/final/train.gccg.parg";
    trainAutoFile = home + "data/final/devel.gccg.auto";
    trainConllFile = home + "data/final/devel.accg.conll";
    trainPargFile = home + "data/final/devel.gccg.parg";
    //testAutoFile = home+"data/final/devel.gccg.auto";
    testAutoFile = "";
    testPargFile = home + "data/final/devel.gccg.pargx";
    testConllFile = home + "data/final/devel.accg.conllx";
    outAutoFile = home + "models/out1.txt";
    outPargFile = home + "models/out2.txt";
    modelFile = home + "models/nnccg.model.txt.gz";
    embedFile = "/home/ambati/ilcc/tools/neural-networks/embeddings/turian/embeddings.raw";

    if (args.length == 0) {
        args = new String[] { "-trainCoNLL", trainConllFile, "-trainAuto", trainAutoFile, "-trainParg",
                trainPargFile, "-testCoNLL", testConllFile, "-testAuto", testAutoFile, "-testParg",
                testPargFile, "-outParg", outPargFile, "-model", modelFile, "-beam", "1", "-embedFile",
                embedFile, "-maxIter", "1",
                //"-isTrain", "true", "-beam", 1, "-debug", "false", "-early", false
        };/*from  w w w  .  j  a  v  a  2 s .co m*/
    }

    Properties props = StringUtils.argsToProperties(args, numArgs);
    IncNNParserGreedy incnnpar = new IncNNParserGreedy(props);

    long start;

    System.err.println("Started Training: " + new Date(System.currentTimeMillis()) + "\n");
    start = (long) (System.currentTimeMillis());
    if (props.getProperty("trainCoNLL") != null)
        incnnpar.train(props.getProperty("trainCoNLL"), props.getProperty("testCoNLL"),
                props.getProperty("model"), props.getProperty("embedFile"), props.getProperty("preModel"));
    System.err.println("Training Time: " + (System.currentTimeMillis() - start) / 1000.0 + " (s)");

    System.err.println("Loading Model: " + new Date(System.currentTimeMillis()) + "\n");
    incnnpar.loadModelFile(props.getProperty("model"));

    System.err.println("Started Parsing: " + new Date(System.currentTimeMillis()) + "\n");
    start = (long) (System.currentTimeMillis());
    incnnpar.test(props.getProperty("testCoNLL"), props.getProperty("testAuto"), props.getProperty("testParg"));
    System.err.println("Parsing Time: " + (System.currentTimeMillis() - start) / 1000.0 + " (s)");
}

From source file:ilcc.ccgparser.test.IncExtractProb.java

public static void main(String[] args) throws IOException, Exception {

    String testAutoFile, testConllFile, outFile, modelFile, embedFile;

    String home = "/home/ambati/ilcc/projects/parsing/experiments/english/ccg/useful/ccg-all/";
    modelFile = home + "models/nn.gincint.model.txt.gz";
    embedFile = "/home/ambati/ilcc/tools/neural-networks/embeddings/turian/embeddings.raw";
    testAutoFile = home + "/devel.per.b16incin.auto";
    testConllFile = home + "../../data/final/devel.innccg.conll";
    outFile = home + "/out1.txt";

    String eeg = "/home/ambati/ilcc/projects/parsing/experiments/eeg/data/";
    testAutoFile = eeg + "inc/wsj.eeg.inninc.auto";
    testConllFile = eeg + "inc/wsj.eeg.conll";
    outFile = eeg + "inc/wsj.eeg.inninc.out.txt";

    testAutoFile = eeg + "oracle/wsj.eeg.out.auto";
    testConllFile = eeg + "oracle/wsj.eeg.inc.conll";
    outFile = eeg + "oracle/wsj.eeg.out.allacts.txt";
    testConllFile = eeg + "oracle/wsj.eeg.conll";
    outFile = eeg + "oracle/wsj.eeg.out.txt";

    if (args.length == 0) {
        args = new String[] { "-testAuto", testAutoFile, "-testCoNLL", testConllFile, "-outFile", outFile,
                "-model", modelFile, "-embedFile", embedFile, };
    }//from  w  ww  .  ja  v a 2  s  .c  o  m

    Properties props = StringUtils.argsToProperties(args, numArgs);
    IncExtractProb incnnpar = new IncExtractProb(props);

    long start;
    System.err.println("Loading Model: " + new Date(System.currentTimeMillis()) + "\n");
    incnnpar.loadModelFile(props.getProperty("model"));

    System.err.println("Started Parsing: " + new Date(System.currentTimeMillis()) + "\n");
    start = (long) (System.currentTimeMillis());
    incnnpar.test(props.getProperty("testCoNLL"), props.getProperty("testAuto"), props.getProperty("outFile"));
    System.err.println("Parsing Time: " + (System.currentTimeMillis() - start) / 1000.0 + " (s)");
}

From source file:ilcc.ccgparser.test.IncGetGoldDerivation.java

public static void main(String[] args) throws IOException, Exception {

    String trainAutoFile, trainConllFile, trainPargFile, testAutoFile, testPargFile, testConllFile, outAutoFile,
            outPargFile, modelFile, algo;

    String eeg = "/home/ambati/ilcc/projects/parsing/experiments/eeg/data/";
    trainAutoFile = eeg + "oracle/wsj.eeg.auto";
    trainConllFile = eeg + "oracle/wsj.eeg.conll";
    outAutoFile = eeg + "oracle/wsj.eeg.out.auto";

    if (args.length == 0) {
        args = new String[] { "-trainCoNLL", trainConllFile, "-trainAuto", trainAutoFile, "-outAuto",
                outAutoFile, };//from  www. ja va2  s  . com
    }

    Properties props = StringUtils.argsToProperties(args, numArgs);

    IncParser parser;
    parser = new IncParserGreedy(props);
    BufferedWriter out = new BufferedWriter(new FileWriter(new File(props.getProperty("outAuto"))));
    if (props.getProperty("trainCoNLL") != null) {
        List<CCGJTreeNode> trees = parser.fillData();
        for (int i = 0; i < trees.size(); i++) {
            CCGJTreeNode tree = trees.get(i);
            if (tree != null)
                writeDeriv(i + 1, out, tree);
            else
                out.write("ID=" + (i + 1) + "\nError:No Derivation\n");
        }
    }
    out.close();
}

From source file:process.PTBTokenizer.java

License:Open Source License

/**
 * Reads files given as arguments and print their tokens, by default as one
 * per line. This is useful either for testing or to run standalone to turn
 * a corpus into a one-token-per-line file of tokens. This main method
 * assumes that the input file is in utf-8 encoding, unless an encoding is
 * specified.//w w  w .  j a  v  a  2  s .  com
 * <p/>
 * Usage: <code>
 * java edu.stanford.nlp.process.PTBTokenizer [options] filename+
 * </code>
 * <p/>
 * Options:
 * <ul>
 * <li>-options options Set various tokenization options (see the
 * documentation in the class javadoc)
 * <li>-preserveLines Produce space-separated tokens, except when the
 * original had a line break, not one-token-per-line
 * <li>-encoding encoding Specifies a character encoding. If you do not
 * specify one, the default is utf-8 (not the platform default).
 * <li>-lowerCase Lowercase all tokens (on tokenization)
 * <li>-parseInside regex Names an XML-style element or a regular expression
 * over such elements. The tokenizer will only tokenize inside elements that
 * match this regex. (This is done by regex matching, not an XML parser, but
 * works well for simple XML documents, or other SGML-style documents, such
 * as Linguistic Data Consortium releases, which adopt the convention that a
 * line of a file is either XML markup or character data but never both.)
 * <li>-ioFileList file* The remaining command-line arguments are treated as
 * filenames that themselves contain lists of pairs of input-output
 * filenames (2 column, whitespace separated).
 * <li>-dump Print the whole of each CoreLabel, not just the value (word)
 * <li>-untok Heuristically untokenize tokenized text
 * <li>-h, -help Print usage info
 * </ul>
 * 
 * @param args
 *            Command line arguments
 * @throws IOException
 *             If any file I/O problem
 */
public static void main(String[] args) throws IOException {

    edu.stanford.nlp.process.PTBTokenizer<HasWord> abctesTokenizer;

    Properties options = StringUtils.argsToProperties(args, optionArgDefs());
    boolean showHelp = PropertiesUtils.getBool(options, "help", false);
    showHelp = PropertiesUtils.getBool(options, "h", showHelp);
    if (showHelp) {
        System.err.println("Usage: java edu.stanford.nlp.process.PTBTokenizer [options]* filename*");
        System.err.println(
                "  options: -h|-preserveLines|-lowerCase|-dump|-ioFileList|-encoding|-parseInside|-options");
        System.exit(0);
    }

    StringBuilder optionsSB = new StringBuilder();
    String tokenizerOptions = options.getProperty("options", null);
    if (tokenizerOptions != null) {
        optionsSB.append(tokenizerOptions);
    }
    boolean preserveLines = PropertiesUtils.getBool(options, "preserveLines", false);
    if (preserveLines) {
        optionsSB.append(",tokenizeNLs");
    }
    boolean inputOutputFileList = PropertiesUtils.getBool(options, "ioFileList", false);
    boolean lowerCase = PropertiesUtils.getBool(options, "lowerCase", false);
    boolean dump = PropertiesUtils.getBool(options, "dump", false);
    boolean untok = PropertiesUtils.getBool(options, "untok", false);
    String charset = options.getProperty("encoding", "utf-8");
    String parseInsideKey = options.getProperty("parseInside", null);
    Pattern parseInsidePattern = null;
    if (parseInsideKey != null) {
        try {
            parseInsidePattern = Pattern.compile("<(/?)(?:" + parseInsideKey + ")(?:\\s[^>]*?)?>");
        } catch (PatternSyntaxException e) {
            // just go with null parseInsidePattern
        }
    }

    // Other arguments are filenames
    String parsedArgStr = options.getProperty("", null);
    String[] parsedArgs = (parsedArgStr == null) ? null : parsedArgStr.split("\\s+");

    ArrayList<String> inputFileList = new ArrayList<String>();
    ArrayList<String> outputFileList = null;
    if (inputOutputFileList && parsedArgs != null) {
        outputFileList = new ArrayList<String>();
        for (String fileName : parsedArgs) {
            BufferedReader r = IOUtils.readerFromString(fileName, charset);
            for (String inLine; (inLine = r.readLine()) != null;) {
                String[] fields = inLine.split("\\s+");
                inputFileList.add(fields[0]);
                if (fields.length > 1) {
                    outputFileList.add(fields[1]);
                } else {
                    outputFileList.add(fields[0] + ".tok");
                }
            }
            r.close();
        }
    } else if (parsedArgs != null) {
        // Concatenate input files into a single output file
        inputFileList.addAll(Arrays.asList(parsedArgs));
    }

    if (untok) {
        untok(inputFileList, outputFileList, charset);
    } else {
        tok(inputFileList, outputFileList, charset, parseInsidePattern, optionsSB.toString(), preserveLines,
                dump, lowerCase);
    }
}