List of usage examples for edu.stanford.nlp.util PropertiesUtils getBool
public static boolean getBool(Properties props, String key, boolean defaultValue)
From source file:ilcc.ccgparser.incderivation.IncParser.java
protected void init(Properties props) throws IOException { model = new AvePerceptron(); sSize = 0;/*from w ww . j a v a 2s. c o m*/ goldDetails = new HashMap<>(); trainAutoFile = props.getProperty("trainAuto"); trainCoNLLFile = props.getProperty("trainCoNLL"); trainPargFile = props.getProperty("trainParg"); testAutoFile = props.getProperty("testAuto"); testPargFile = props.getProperty("testParg"); testCoNLLFile = props.getProperty("testCoNLL"); outAutoFile = props.getProperty("outAuto"); outPargFile = props.getProperty("outParg"); modelFile = props.getProperty("model"); incalgo = props.getProperty("algo").equals("RevInc"); isTrain = PropertiesUtils.getBool(props, "isTrain", false); beamSize = PropertiesUtils.getInt(props, "beam", 1); iters = PropertiesUtils.getInt(props, "iters", 10); early_update = PropertiesUtils.getBool(props, "early", false); lookAhead = PropertiesUtils.getBool(props, "lookAhead", true); debug = PropertiesUtils.getInt(props, "debug", 0); srparser = (incalgo) ? new RevInc() : new NonInc(); srparser.incalgo = incalgo; Commons.setDebug(debug); Commons.setIncAlgo(incalgo); actsMap = new HashMap<>(); }
From source file:ilcc.ccgparser.nnparser.Config.java
private void setProperties(Properties props) { trainingThreads = PropertiesUtils.getInt(props, "trainingThreads", trainingThreads); wordCutOff = PropertiesUtils.getInt(props, "wordCutOff", wordCutOff); initRange = PropertiesUtils.getDouble(props, "initRange", initRange); maxIter = PropertiesUtils.getInt(props, "maxIter", maxIter); batchSize = PropertiesUtils.getInt(props, "batchSize", batchSize); adaEps = PropertiesUtils.getDouble(props, "adaEps", adaEps); adaAlpha = PropertiesUtils.getDouble(props, "adaAlpha", adaAlpha); regParameter = PropertiesUtils.getDouble(props, "regParameter", regParameter); dropProb = PropertiesUtils.getDouble(props, "dropProb", dropProb); hiddenSize = PropertiesUtils.getInt(props, "hiddenSize", hiddenSize); embeddingSize = PropertiesUtils.getInt(props, "embeddingSize", embeddingSize); numPreComputed = PropertiesUtils.getInt(props, "numPreComputed", numPreComputed); evalPerIter = PropertiesUtils.getInt(props, "evalPerIter", evalPerIter); clearGradientsPerIter = PropertiesUtils.getInt(props, "clearGradientsPerIter", clearGradientsPerIter); saveIntermediate = PropertiesUtils.getBool(props, "saveIntermediate", saveIntermediate); unlabeled = PropertiesUtils.getBool(props, "unlabeled", unlabeled); cPOS = PropertiesUtils.getBool(props, "cPOS", cPOS); noPunc = PropertiesUtils.getBool(props, "noPunc", noPunc); lookAhead = PropertiesUtils.getBool(props, "lookAhead", false); numTokens = (lookAhead) ? 39 : 33;// w w w . jav a 2 s . c om }
From source file:ilcc.ccgparser.test.IncExtractProb.java
public IncExtractProb(Properties props) throws IOException { config = new Config(props); model = new AvePerceptron(); sSize = 0;//from w w w .ja v a 2s .co m testAutoFile = props.getProperty("testAuto"); testCoNLLFile = props.getProperty("testCoNLL"); outFile = props.getProperty("outFile"); modelFile = props.getProperty("model"); lookAhead = PropertiesUtils.getBool(props, "lookAhead", false); srparser = new RevInc(); srparser.incalgo = true; }
From source file:process.PTBTokenizer.java
License:Open Source License
/** * Reads files given as arguments and print their tokens, by default as one * per line. This is useful either for testing or to run standalone to turn * a corpus into a one-token-per-line file of tokens. This main method * assumes that the input file is in utf-8 encoding, unless an encoding is * specified./*from ww w . j a v a 2s . co m*/ * <p/> * Usage: <code> * java edu.stanford.nlp.process.PTBTokenizer [options] filename+ * </code> * <p/> * Options: * <ul> * <li>-options options Set various tokenization options (see the * documentation in the class javadoc) * <li>-preserveLines Produce space-separated tokens, except when the * original had a line break, not one-token-per-line * <li>-encoding encoding Specifies a character encoding. If you do not * specify one, the default is utf-8 (not the platform default). * <li>-lowerCase Lowercase all tokens (on tokenization) * <li>-parseInside regex Names an XML-style element or a regular expression * over such elements. The tokenizer will only tokenize inside elements that * match this regex. (This is done by regex matching, not an XML parser, but * works well for simple XML documents, or other SGML-style documents, such * as Linguistic Data Consortium releases, which adopt the convention that a * line of a file is either XML markup or character data but never both.) * <li>-ioFileList file* The remaining command-line arguments are treated as * filenames that themselves contain lists of pairs of input-output * filenames (2 column, whitespace separated). * <li>-dump Print the whole of each CoreLabel, not just the value (word) * <li>-untok Heuristically untokenize tokenized text * <li>-h, -help Print usage info * </ul> * * @param args * Command line arguments * @throws IOException * If any file I/O problem */ public static void main(String[] args) throws IOException { edu.stanford.nlp.process.PTBTokenizer<HasWord> abctesTokenizer; Properties options = StringUtils.argsToProperties(args, optionArgDefs()); boolean showHelp = PropertiesUtils.getBool(options, "help", false); showHelp = PropertiesUtils.getBool(options, "h", showHelp); if (showHelp) { System.err.println("Usage: java edu.stanford.nlp.process.PTBTokenizer [options]* filename*"); System.err.println( " options: -h|-preserveLines|-lowerCase|-dump|-ioFileList|-encoding|-parseInside|-options"); System.exit(0); } StringBuilder optionsSB = new StringBuilder(); String tokenizerOptions = options.getProperty("options", null); if (tokenizerOptions != null) { optionsSB.append(tokenizerOptions); } boolean preserveLines = PropertiesUtils.getBool(options, "preserveLines", false); if (preserveLines) { optionsSB.append(",tokenizeNLs"); } boolean inputOutputFileList = PropertiesUtils.getBool(options, "ioFileList", false); boolean lowerCase = PropertiesUtils.getBool(options, "lowerCase", false); boolean dump = PropertiesUtils.getBool(options, "dump", false); boolean untok = PropertiesUtils.getBool(options, "untok", false); String charset = options.getProperty("encoding", "utf-8"); String parseInsideKey = options.getProperty("parseInside", null); Pattern parseInsidePattern = null; if (parseInsideKey != null) { try { parseInsidePattern = Pattern.compile("<(/?)(?:" + parseInsideKey + ")(?:\\s[^>]*?)?>"); } catch (PatternSyntaxException e) { // just go with null parseInsidePattern } } // Other arguments are filenames String parsedArgStr = options.getProperty("", null); String[] parsedArgs = (parsedArgStr == null) ? null : parsedArgStr.split("\\s+"); ArrayList<String> inputFileList = new ArrayList<String>(); ArrayList<String> outputFileList = null; if (inputOutputFileList && parsedArgs != null) { outputFileList = new ArrayList<String>(); for (String fileName : parsedArgs) { BufferedReader r = IOUtils.readerFromString(fileName, charset); for (String inLine; (inLine = r.readLine()) != null;) { String[] fields = inLine.split("\\s+"); inputFileList.add(fields[0]); if (fields.length > 1) { outputFileList.add(fields[1]); } else { outputFileList.add(fields[0] + ".tok"); } } r.close(); } } else if (parsedArgs != null) { // Concatenate input files into a single output file inputFileList.addAll(Arrays.asList(parsedArgs)); } if (untok) { untok(inputFileList, outputFileList, charset); } else { tok(inputFileList, outputFileList, charset, parseInsidePattern, optionsSB.toString(), preserveLines, dump, lowerCase); } }