List of usage examples for edu.stanford.nlp.util Timing Timing
public Timing()
From source file:ilcc.ccgparser.nnparser.IncNNParser.java
private void loadModelFile(String modelFile, boolean verbose) throws IOException { Timing t = new Timing(); try {//from ww w. j a va2 s. co m System.err.println("Loading ccg parser model file: " + modelFile + " ... "); String s; BufferedReader input = IOUtils.readerFromString(modelFile); s = input.readLine(); int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nccgCat = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nPreComputed = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int classes = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nuRules = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nbRules = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nrRules = Integer.parseInt(s.substring(s.indexOf('=') + 1)); actsMap = new HashMap<>(); knownWords = new ArrayList<>(); knownPos = new ArrayList<>(); knownCCGCats = new ArrayList<>(); srparser = new NonInc(); double[][] E = new double[nDict + nPOS + nccgCat][eSize]; String[] splits; int index = 0; for (int k = 0; k < classes; k++) { s = input.readLine().trim(); splits = s.split("--"); actsMap.put(ArcJAction.make(SRAction.valueOf(splits[0]), Integer.parseInt(splits[1]), splits[2], RuleType.valueOf(splits[3])), k); } for (int k = 0; k < nuRules; k++) { s = input.readLine().trim(); splits = s.split(" "); String key = splits[0]; for (int i = 1; i < splits.length; i++) { String[] parts = splits[i].split("--"); CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), null, CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0); srparser.treebankRules.addUnaryRuleInfo(info, key); } } for (int k = 0; k < nbRules; k++) { s = input.readLine().trim(); splits = s.split(" "); String key = splits[0]; for (int i = 1; i < splits.length; i++) { String[] parts = splits[i].split("--"); CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0); srparser.treebankRules.addBinaryRuleInfo(info, key); } } for (int k = 0; k < nrRules; k++) { s = input.readLine().trim(); splits = s.split(" "); String key = splits[0]; for (int i = 1; i < splits.length; i++) { String[] parts = splits[i].split("--"); CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0); srparser.treebankRules.addRevealRuleInfo(info, key); } } for (int k = 0; k < nDict; ++k) { s = input.readLine(); splits = s.split(" "); knownWords.add(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); index = index + 1; } for (int k = 0; k < nPOS; ++k) { s = input.readLine(); splits = s.split(" "); knownPos.add(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); index = index + 1; } for (int k = 0; k < nccgCat; ++k) { s = input.readLine(); splits = s.split(" "); knownCCGCats.add(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); index = index + 1; } generateIDs(); double[][] W1 = new double[hSize][eSize * nTokens]; for (int j = 0; j < W1[0].length; ++j) { s = input.readLine(); splits = s.split(" "); for (int i = 0; i < W1.length; ++i) W1[i][j] = Double.parseDouble(splits[i]); } double[] b1 = new double[hSize]; s = input.readLine(); splits = s.split(" "); for (int i = 0; i < b1.length; ++i) b1[i] = Double.parseDouble(splits[i]); double[][] W2 = new double[classes][hSize]; for (int j = 0; j < W2[0].length; ++j) { s = input.readLine(); splits = s.split(" "); for (int i = 0; i < W2.length; ++i) W2[i][j] = Double.parseDouble(splits[i]); } preComputed = new ArrayList<Integer>(); while (preComputed.size() < nPreComputed) { s = input.readLine(); splits = s.split(" "); for (String split : splits) { preComputed.add(Integer.parseInt(split)); } } input.close(); classifier = new Classifier(config, E, W1, b1, W2, preComputed); } catch (IOException e) { throw new RuntimeIOException(e); } // initialize the loaded parser // Pre-compute matrix multiplications if (config.numPreComputed > 0) { classifier.preCompute(); } t.done("Initializing ccg parser"); }
From source file:ilcc.ccgparser.test.IncExtractProb.java
private void loadModelFile(String modelFile, boolean verbose) throws IOException { Timing t = new Timing(); try {/* www .j av a2 s .co m*/ System.err.println("Loading ccg parser model file: " + modelFile + " ... "); String s; BufferedReader input = IOUtils.readerFromString(modelFile); s = input.readLine(); int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nccgCat = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nPreComputed = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int classes = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nuRules = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nbRules = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nrRules = Integer.parseInt(s.substring(s.indexOf('=') + 1)); actsMap = new HashMap<>(); knownWords = new ArrayList<>(); knownPos = new ArrayList<>(); knownCCGCats = new ArrayList<>(); double[][] E = new double[nDict + nPOS + nccgCat][eSize]; String[] splits; int index = 0; for (int k = 0; k < classes; k++) { s = input.readLine().trim(); splits = s.split("--"); actsMap.put(ArcJAction.make(SRAction.valueOf(splits[0]), Integer.parseInt(splits[1]), splits[2], RuleType.valueOf(splits[3])), k); } for (int k = 0; k < nuRules; k++) { s = input.readLine().trim(); splits = s.split(" "); String key = splits[0]; for (int i = 1; i < splits.length; i++) { String[] parts = splits[i].split("--"); CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), null, CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0); srparser.treebankRules.addUnaryRuleInfo(info, key); } } for (int k = 0; k < nbRules; k++) { s = input.readLine().trim(); splits = s.split(" "); String key = splits[0]; for (int i = 1; i < splits.length; i++) { String[] parts = splits[i].split("--"); CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0); srparser.treebankRules.addBinaryRuleInfo(info, key); } } for (int k = 0; k < nrRules; k++) { s = input.readLine().trim(); splits = s.split(" "); String key = splits[0]; for (int i = 1; i < splits.length; i++) { String[] parts = splits[i].split("--"); CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0); srparser.treebankRules.addRevealRuleInfo(info, key); } } for (int k = 0; k < nDict; ++k) { s = input.readLine(); splits = s.split(" "); knownWords.add(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); index = index + 1; } for (int k = 0; k < nPOS; ++k) { s = input.readLine(); splits = s.split(" "); knownPos.add(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); index = index + 1; } for (int k = 0; k < nccgCat; ++k) { s = input.readLine(); splits = s.split(" "); knownCCGCats.add(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); index = index + 1; } generateIDs(); double[][] W1 = new double[hSize][eSize * nTokens]; for (int j = 0; j < W1[0].length; ++j) { s = input.readLine(); splits = s.split(" "); for (int i = 0; i < W1.length; ++i) W1[i][j] = Double.parseDouble(splits[i]); } double[] b1 = new double[hSize]; s = input.readLine(); splits = s.split(" "); for (int i = 0; i < b1.length; ++i) b1[i] = Double.parseDouble(splits[i]); double[][] W2 = new double[classes][hSize]; for (int j = 0; j < W2[0].length; ++j) { s = input.readLine(); splits = s.split(" "); for (int i = 0; i < W2.length; ++i) W2[i][j] = Double.parseDouble(splits[i]); } preComputed = new ArrayList<Integer>(); while (preComputed.size() < nPreComputed) { s = input.readLine(); splits = s.split(" "); for (String split : splits) { preComputed.add(Integer.parseInt(split)); } } input.close(); classifier = new Classifier(config, E, W1, b1, W2, preComputed); } catch (IOException e) { throw new RuntimeIOException(e); } // initialize the loaded parser // Pre-compute matrix multiplications if (config.numPreComputed > 0) { classifier.preCompute(); } t.done("Initializing ccg parser"); }
From source file:reck.parser.lexparser.RECKLexicalizedParser.java
License:Open Source License
/** Parse the files with names given in the String array args elements from * index argIndex on.//from w w w . j a v a 2s .co m */ public ArrayList parseFile(String filename, String content, int startSentence, boolean tokenized, TokenizerFactory tokenizerFactory, List<List<? extends HasWord>> document, DocumentPreprocessor documentPreprocessor, Function<List<HasWord>, List<HasWord>> escaper, int tagDelimiter) { ArrayList treeList = new ArrayList(); PrintWriter pwOut = op.tlpParams.pw(); PrintWriter pwErr = op.tlpParams.pw(System.err); RECKTreePrint treePrint = getRECKTreePrint(op); int numWords = 0; int numSents = 0; int numUnparsable = 0; int numNoMemory = 0; int numFallback = 0; int numSkipped = 0; Timing timer = new Timing(); TreebankLanguagePack tlp = op.tlpParams.treebankLanguagePack(); // set the tokenizer if (tokenized) { tokenizerFactory = WhitespaceTokenizer.factory(); } if (tokenizerFactory == null) { tokenizerFactory = tlp.getTokenizerFactory(); } if (Test.verbose) { System.err.println("parseFiles: Tokenizer factory is: " + tokenizerFactory); System.err.println("Sentence final words are: " + Arrays.asList(tlp.sentenceFinalPunctuationWords())); System.err.println("File encoding is: " + op.tlpParams.getInputEncoding()); } documentPreprocessor.setTokenizerFactory(tokenizerFactory); documentPreprocessor.setSentenceFinalPuncWords(tlp.sentenceFinalPunctuationWords()); documentPreprocessor.setEncoding(op.tlpParams.getInputEncoding()); boolean saidMemMessage = false; // evaluation setup boolean runningAverages = Boolean.parseBoolean(Test.evals.getProperty("runningAverages")); boolean summary = Boolean.parseBoolean(Test.evals.getProperty("summary")); AbstractEval.ScoreEval pcfgLL = null; if (Boolean.parseBoolean(Test.evals.getProperty("pcfgLL"))) { pcfgLL = new AbstractEval.ScoreEval("pcfgLL", runningAverages); } AbstractEval.ScoreEval depLL = null; if (Boolean.parseBoolean(Test.evals.getProperty("depLL"))) { depLL = new AbstractEval.ScoreEval("depLL", runningAverages); } AbstractEval.ScoreEval factLL = null; if (Boolean.parseBoolean(Test.evals.getProperty("factLL"))) { factLL = new AbstractEval.ScoreEval("factLL", runningAverages); } /** Hide for performance timer.start(); System.out.println("Parsing file: " + filename + " with " + document.size() + " sentences.");*/ PrintWriter pwo = pwOut; int num = 0, docIndex = startSentence; for (List sentence : document) { // System.out.println(sentence.toString()); num++; numSents++; int len = sentence.size(); numWords += len; Tree ansTree = null; try { if (!parse(sentence)) { pwErr.print("Sentence couldn't be parsed by grammar."); if (pparser != null && pparser.hasParse() && fallbackToPCFG) { pwErr.println("... falling back to PCFG parse."); ansTree = getBestPCFGParse(); numFallback++; } else { pwErr.println(); numUnparsable++; } } else { // System.out.println("Score: " + lp.pparser.bestScore); ansTree = getBestParse(); } if (pcfgLL != null && pparser != null) { pcfgLL.recordScore(pparser, pwErr); } if (depLL != null && dparser != null) { depLL.recordScore(dparser, pwErr); } if (factLL != null && bparser != null) { factLL.recordScore(bparser, pwErr); } } catch (OutOfMemoryError e) { if (Test.maxLength != -0xDEADBEEF) { // this means they explicitly asked for a length they cannot handle. Throw exception. pwErr.println("NOT ENOUGH MEMORY TO PARSE SENTENCES OF LENGTH " + Test.maxLength); pwo.println("NOT ENOUGH MEMORY TO PARSE SENTENCES OF LENGTH " + Test.maxLength); throw e; } else { if (!saidMemMessage) { printOutOfMemory(pwErr); saidMemMessage = true; } if (pparser.hasParse() && fallbackToPCFG) { try { String what = "dependency"; if (dparser.hasParse()) { what = "factored"; } pwErr.println( "Sentence too long for " + what + " parser. Falling back to PCFG parse..."); ansTree = getBestPCFGParse(); numFallback++; } catch (OutOfMemoryError oome) { oome.printStackTrace(); numNoMemory++; pwErr.println("No memory to gather PCFG parse. Skipping..."); pwo.println("Sentence skipped: no PCFG fallback."); pparser.nudgeDownArraySize(); } } else { pwErr.println( "Sentence has no parse using PCFG grammar (or no PCFG fallback). Skipping..."); pwo.println("Sentence skipped: no PCFG fallback."); numSkipped++; } } } catch (UnsupportedOperationException uEx) { pwErr.println("Sentence too long (or zero words)."); pwo.println("Sentence skipped: too long (or zero words)."); numWords -= len; numSkipped++; } if (ansTree != null) { computePosition(docIndex, (Sentence) sentence, content); TDs = treePrint.getDependencies(ansTree, reckTreeList, sentencePosition); if (TDs.size() > 0) TDs = treePrint.orderDependencies(TDs, ansTree.getLeaves().size()); RECKDPTreeNodeImpl DPTree = treePrint.convertToDependencyTree(ansTree, reckTreeList, sentencePosition); DPTree = this.splitHyphen_Dependency(DPTree); DPTree = this.splitPoint_Dependency(DPTree); RECKCTTreeNodeImpl CTTree = convertToRECKTree(ansTree, docIndex, content); CTTree = this.splitHyphen_Constituent(CTTree); CTTree = this.splitPoint_Constituent(CTTree); RECKParseTreeImpl rpTree = new RECKParseTreeImpl(sentence, TDs, sentencePosition, DPTree, CTTree); treeList.add(rpTree); } // crude addition of k-best tree printing if (Test.printPCFGkBest > 0 && pparser.hasParse()) { if (ansTree != null) { computePosition(docIndex, (Sentence) sentence, content); TDs = treePrint.getDependencies(ansTree, reckTreeList, sentencePosition); if (TDs.size() > 0) TDs = treePrint.orderDependencies(TDs, ansTree.getLeaves().size()); RECKDPTreeNodeImpl DPTree = treePrint.convertToDependencyTree(ansTree, reckTreeList, sentencePosition); DPTree = this.splitHyphen_Dependency(DPTree); DPTree = this.splitPoint_Dependency(DPTree); RECKCTTreeNodeImpl CTTree = convertToRECKTree(ansTree, docIndex, content); CTTree = this.splitHyphen_Constituent(CTTree); CTTree = this.splitPoint_Constituent(CTTree); RECKParseTreeImpl rpTree = new RECKParseTreeImpl(sentence, TDs, sentencePosition, DPTree, CTTree); treeList.add(rpTree); } } else if (Test.printFactoredKGood > 0 && bparser.hasParse()) { // DZ: debug n best trees if (ansTree != null) { computePosition(docIndex, (Sentence) sentence, content); TDs = treePrint.getDependencies(ansTree, reckTreeList, sentencePosition); if (TDs.size() > 0) TDs = treePrint.orderDependencies(TDs, ansTree.getLeaves().size()); RECKDPTreeNodeImpl DPTree = treePrint.convertToDependencyTree(ansTree, reckTreeList, sentencePosition); DPTree = this.splitHyphen_Dependency(DPTree); DPTree = this.splitPoint_Dependency(DPTree); RECKCTTreeNodeImpl CTTree = convertToRECKTree(ansTree, docIndex, content); CTTree = this.splitHyphen_Constituent(CTTree); CTTree = this.splitPoint_Constituent(CTTree); RECKParseTreeImpl rpTree = new RECKParseTreeImpl(sentence, TDs, sentencePosition, DPTree, CTTree); treeList.add(rpTree); } } docIndex = sentencePosition.getEnd().intValue(); } // for sentence : document if (Test.writeOutputFiles) { pwo.close(); } System.out.println("Parsed file: " + filename + " [" + num + " sentences]."); /** Hide for performance long millis = timer.stop(); if (summary) { if (pcfgLL != null) pcfgLL.display(false, pwErr); if (depLL != null) depLL.display(false, pwErr); if (factLL != null) factLL.display(false, pwErr); }*/ if (saidMemMessage) { printOutOfMemory(pwErr); } /** Hide for performance double wordspersec = numWords / (((double) millis) / 1000); double sentspersec = numSents / (((double) millis) / 1000); NumberFormat nf = new DecimalFormat("0.00"); // easier way! System.out.println("Parsed " + numWords + " words in " + numSents + " sentences (" + nf.format(wordspersec) + " wds/sec; " + nf.format(sentspersec) + " sents/sec)."); */ if (numFallback > 0) { pwErr.println(" " + numFallback + " sentences were parsed by fallback to PCFG."); } if (numUnparsable > 0 || numNoMemory > 0 || numSkipped > 0) { pwErr.println(" " + (numUnparsable + numNoMemory + numSkipped) + " sentences were not parsed:"); if (numUnparsable > 0) { pwErr.println(" " + numUnparsable + " were not parsable with non-zero probability."); } if (numNoMemory > 0) { pwErr.println(" " + numNoMemory + " were skipped because of insufficient memory."); } if (numSkipped > 0) { pwErr.println(" " + numSkipped + " were skipped as length 0 or greater than " + Test.maxLength); } } return treeList; }