List of usage examples for edu.stanford.nlp.io RuntimeIOException RuntimeIOException
public RuntimeIOException(Throwable cause)
From source file:ilcc.ccgparser.nnparser.IncNNParser.java
private double[][] readEmbedFile(String embedFile, Map<String, Integer> embedID) { double[][] embeddings = null; if (embedFile != null) { BufferedReader input = null; try {//from w w w . java2s . co m input = IOUtils.readerFromString(embedFile); List<String> lines = new ArrayList<String>(); for (String s; (s = input.readLine()) != null;) { lines.add(s); } int nWords = lines.size(); String[] splits = lines.get(0).split("\\s+"); int dim = splits.length - 1; embeddings = new double[nWords][dim]; System.err.println("Embedding File " + embedFile + ": #Words = " + nWords + ", dim = " + dim); if (dim != config.embeddingSize) throw new IllegalArgumentException( "The dimension of embedding file does not match config.embeddingSize"); for (int i = 0; i < lines.size(); ++i) { splits = lines.get(i).split("\\s+"); embedID.put(splits[0], i); for (int j = 0; j < dim; ++j) embeddings[i][j] = Double.parseDouble(splits[j + 1]); } } catch (IOException e) { throw new RuntimeIOException(e); } finally { IOUtils.closeIgnoringExceptions(input); } } return embeddings; }
From source file:ilcc.ccgparser.nnparser.IncNNParser.java
private void setupClassifierForTraining(List<CCGJSentence> trainSents, List<CCGJTreeNode> trainTrees, String embedFile, String preModel) throws IOException { double[][] E = new double[knownWords.size() + knownPos.size() + knownCCGCats.size()][config.embeddingSize]; double[][] W1 = new double[config.hiddenSize][config.embeddingSize * config.numTokens]; double[] b1 = new double[config.hiddenSize]; double[][] W2 = new double[actsList.size()][config.hiddenSize]; // Randomly initialize weight matrices / vectors Random random = Util.getRandom(); for (int i = 0; i < W1.length; ++i) for (int j = 0; j < W1[i].length; ++j) W1[i][j] = random.nextDouble() * 2 * config.initRange - config.initRange; for (int i = 0; i < b1.length; ++i) b1[i] = random.nextDouble() * 2 * config.initRange - config.initRange; for (int i = 0; i < W2.length; ++i) for (int j = 0; j < W2[i].length; ++j) W2[i][j] = random.nextDouble() * 2 * config.initRange - config.initRange; // Read embeddings into `embedID`, `embeddings` Map<String, Integer> embedID = new HashMap<String, Integer>(); double[][] embeddings = readEmbedFile(embedFile, embedID); // Try to match loaded embeddings with words in dictionary int foundEmbed = 0; for (int i = 0; i < E.length; ++i) { int index = -1; if (i < knownWords.size()) { String str = knownWords.get(i); //NOTE: exact match first, and then try lower case.. if (embedID.containsKey(str)) index = embedID.get(str); else if (embedID.containsKey(str.toLowerCase())) index = embedID.get(str.toLowerCase()); }//w ww .j a v a 2s . c om if (index >= 0) { ++foundEmbed; for (int j = 0; j < E[i].length; ++j) E[i][j] = embeddings[index][j]; } else { for (int j = 0; j < E[i].length; ++j) E[i][j] = random.nextDouble() * 0.02 - 0.01; } } System.err.println("Found embeddings: " + foundEmbed + " / " + knownWords.size()); if (preModel != null) { try { System.err.println("Loading pre-trained model file: " + preModel + " ... "); String s; BufferedReader input = IOUtils.readerFromString(preModel); s = input.readLine(); int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nLabel = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); String[] splits; for (int k = 0; k < nDict; ++k) { s = input.readLine(); splits = s.split(" "); if (wordIDs.containsKey(splits[0]) && eSize == config.embeddingSize) { int index = getWordID(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); } } for (int k = 0; k < nPOS; ++k) { s = input.readLine(); splits = s.split(" "); if (posIDs.containsKey(splits[0]) && eSize == config.embeddingSize) { int index = getPosID(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); } } for (int k = 0; k < nLabel; ++k) { s = input.readLine(); splits = s.split(" "); if (ccgcatIDs.containsKey(splits[0]) && eSize == config.embeddingSize) { int index = getCCGCatID(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); } } boolean copyLayer1 = hSize == config.hiddenSize && config.embeddingSize == eSize && config.numTokens == nTokens; if (copyLayer1) { System.err.println("Copying parameters W1 && b1..."); } for (int j = 0; j < eSize * nTokens; ++j) { s = input.readLine(); if (copyLayer1) { splits = s.split(" "); for (int i = 0; i < hSize; ++i) W1[i][j] = Double.parseDouble(splits[i]); } } s = input.readLine(); if (copyLayer1) { splits = s.split(" "); for (int i = 0; i < hSize; ++i) b1[i] = Double.parseDouble(splits[i]); } boolean copyLayer2 = (nLabel * 2 - 1 == actsList.size()) && hSize == config.hiddenSize; if (copyLayer2) System.err.println("Copying parameters W2..."); for (int j = 0; j < hSize; ++j) { s = input.readLine(); if (copyLayer2) { splits = s.split(" "); for (int i = 0; i < nLabel * 2 - 1; ++i) W2[i][j] = Double.parseDouble(splits[i]); } } input.close(); } catch (IOException e) { throw new RuntimeIOException(e); } } Dataset trainSet = genTrainExamples(trainSents, trainTrees); classifier = new Classifier(config, trainSet, E, W1, b1, W2, preComputed); }
From source file:ilcc.ccgparser.nnparser.IncNNParser.java
private void loadModelFile(String modelFile, boolean verbose) throws IOException { Timing t = new Timing(); try {//from w w w. ja v a 2 s.com System.err.println("Loading ccg parser model file: " + modelFile + " ... "); String s; BufferedReader input = IOUtils.readerFromString(modelFile); s = input.readLine(); int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nccgCat = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nPreComputed = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int classes = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nuRules = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nbRules = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nrRules = Integer.parseInt(s.substring(s.indexOf('=') + 1)); actsMap = new HashMap<>(); knownWords = new ArrayList<>(); knownPos = new ArrayList<>(); knownCCGCats = new ArrayList<>(); srparser = new NonInc(); double[][] E = new double[nDict + nPOS + nccgCat][eSize]; String[] splits; int index = 0; for (int k = 0; k < classes; k++) { s = input.readLine().trim(); splits = s.split("--"); actsMap.put(ArcJAction.make(SRAction.valueOf(splits[0]), Integer.parseInt(splits[1]), splits[2], RuleType.valueOf(splits[3])), k); } for (int k = 0; k < nuRules; k++) { s = input.readLine().trim(); splits = s.split(" "); String key = splits[0]; for (int i = 1; i < splits.length; i++) { String[] parts = splits[i].split("--"); CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), null, CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0); srparser.treebankRules.addUnaryRuleInfo(info, key); } } for (int k = 0; k < nbRules; k++) { s = input.readLine().trim(); splits = s.split(" "); String key = splits[0]; for (int i = 1; i < splits.length; i++) { String[] parts = splits[i].split("--"); CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0); srparser.treebankRules.addBinaryRuleInfo(info, key); } } for (int k = 0; k < nrRules; k++) { s = input.readLine().trim(); splits = s.split(" "); String key = splits[0]; for (int i = 1; i < splits.length; i++) { String[] parts = splits[i].split("--"); CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0); srparser.treebankRules.addRevealRuleInfo(info, key); } } for (int k = 0; k < nDict; ++k) { s = input.readLine(); splits = s.split(" "); knownWords.add(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); index = index + 1; } for (int k = 0; k < nPOS; ++k) { s = input.readLine(); splits = s.split(" "); knownPos.add(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); index = index + 1; } for (int k = 0; k < nccgCat; ++k) { s = input.readLine(); splits = s.split(" "); knownCCGCats.add(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); index = index + 1; } generateIDs(); double[][] W1 = new double[hSize][eSize * nTokens]; for (int j = 0; j < W1[0].length; ++j) { s = input.readLine(); splits = s.split(" "); for (int i = 0; i < W1.length; ++i) W1[i][j] = Double.parseDouble(splits[i]); } double[] b1 = new double[hSize]; s = input.readLine(); splits = s.split(" "); for (int i = 0; i < b1.length; ++i) b1[i] = Double.parseDouble(splits[i]); double[][] W2 = new double[classes][hSize]; for (int j = 0; j < W2[0].length; ++j) { s = input.readLine(); splits = s.split(" "); for (int i = 0; i < W2.length; ++i) W2[i][j] = Double.parseDouble(splits[i]); } preComputed = new ArrayList<Integer>(); while (preComputed.size() < nPreComputed) { s = input.readLine(); splits = s.split(" "); for (String split : splits) { preComputed.add(Integer.parseInt(split)); } } input.close(); classifier = new Classifier(config, E, W1, b1, W2, preComputed); } catch (IOException e) { throw new RuntimeIOException(e); } // initialize the loaded parser // Pre-compute matrix multiplications if (config.numPreComputed > 0) { classifier.preCompute(); } t.done("Initializing ccg parser"); }
From source file:ilcc.ccgparser.nnparser.IncNNParser.java
public void writeModelFile(String modelFile) { try {//from w w w. j a va 2 s . c o m double[][] W1 = classifier.getW1(); double[] b1 = classifier.getb1(); double[][] W2 = classifier.getW2(); double[][] E = classifier.getE(); Writer output = IOUtils.getPrintWriter(modelFile); HashMap<String, ArrayList<CCGJRuleInfo>> uRules = srparser.treebankRules.getUnaryRules(); HashMap<String, ArrayList<CCGJRuleInfo>> bRules = srparser.treebankRules.getBinaryRules(); HashMap<String, ArrayList<CCGJRuleInfo>> rRules = srparser.treebankRules.getRevealRules(); output.write("dict=" + knownWords.size() + "\n"); output.write("pos=" + knownPos.size() + "\n"); output.write("ccg cats=" + knownCCGCats.size() + "\n"); output.write("embeddingSize=" + E[0].length + "\n"); output.write("hiddenSize=" + b1.length + "\n"); output.write("numTokens=" + (W1[0].length / E[0].length) + "\n"); output.write("preComputed=" + preComputed.size() + "\n"); output.write("classes=" + actsMap.size() + "\n"); output.write("UnaryRules=" + uRules.size() + "\n"); output.write("BinaryRules=" + bRules.size() + "\n"); output.write("RevealRules=" + rRules.size() + "\n"); int index = 0; // Classes for (ArcJAction act : actsList) output.write(act.toString() + "\n"); // Unary and Binary Rules for (String key : uRules.keySet()) { ArrayList<CCGJRuleInfo> list = uRules.get(key); output.write(key); for (CCGJRuleInfo info : list) { output.write(" " + info.toString()); } output.write("\n"); } for (String key : bRules.keySet()) { ArrayList<CCGJRuleInfo> list = bRules.get(key); output.write(key); for (CCGJRuleInfo info : list) { output.write(" " + info.toString()); } output.write("\n"); } for (String key : rRules.keySet()) { ArrayList<CCGJRuleInfo> list = rRules.get(key); output.write(key); for (CCGJRuleInfo info : list) { output.write(" " + info.toString()); } output.write("\n"); } // First write word / POS / label embeddings for (String word : knownWords) { output.write(word); for (int k = 0; k < E[index].length; ++k) output.write(" " + E[index][k]); output.write("\n"); index = index + 1; } for (String pos : knownPos) { output.write(pos); for (int k = 0; k < E[index].length; ++k) output.write(" " + E[index][k]); output.write("\n"); index = index + 1; } for (String label : knownCCGCats) { output.write(label); for (int k = 0; k < E[index].length; ++k) output.write(" " + E[index][k]); output.write("\n"); index = index + 1; } // Now write classifier weights for (int j = 0; j < W1[0].length; ++j) for (int i = 0; i < W1.length; ++i) { output.write("" + W1[i][j]); if (i == W1.length - 1) output.write("\n"); else output.write(" "); } for (int i = 0; i < b1.length; ++i) { output.write("" + b1[i]); if (i == b1.length - 1) output.write("\n"); else output.write(" "); } for (int j = 0; j < W2[0].length; ++j) for (int i = 0; i < W2.length; ++i) { output.write("" + W2[i][j]); if (i == W2.length - 1) output.write("\n"); else output.write(" "); } // Finish with pre-computation info for (int i = 0; i < preComputed.size(); ++i) { output.write("" + preComputed.get(i)); if ((i + 1) % 100 == 0 || i == preComputed.size() - 1) output.write("\n"); else output.write(" "); } output.close(); } catch (IOException e) { throw new RuntimeIOException(e); } }
From source file:ilcc.ccgparser.test.IncExtractProb.java
private void loadModelFile(String modelFile, boolean verbose) throws IOException { Timing t = new Timing(); try {/*from w ww. ja v a2 s. com*/ System.err.println("Loading ccg parser model file: " + modelFile + " ... "); String s; BufferedReader input = IOUtils.readerFromString(modelFile); s = input.readLine(); int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nccgCat = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nPreComputed = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int classes = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nuRules = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nbRules = Integer.parseInt(s.substring(s.indexOf('=') + 1)); s = input.readLine(); int nrRules = Integer.parseInt(s.substring(s.indexOf('=') + 1)); actsMap = new HashMap<>(); knownWords = new ArrayList<>(); knownPos = new ArrayList<>(); knownCCGCats = new ArrayList<>(); double[][] E = new double[nDict + nPOS + nccgCat][eSize]; String[] splits; int index = 0; for (int k = 0; k < classes; k++) { s = input.readLine().trim(); splits = s.split("--"); actsMap.put(ArcJAction.make(SRAction.valueOf(splits[0]), Integer.parseInt(splits[1]), splits[2], RuleType.valueOf(splits[3])), k); } for (int k = 0; k < nuRules; k++) { s = input.readLine().trim(); splits = s.split(" "); String key = splits[0]; for (int i = 1; i < splits.length; i++) { String[] parts = splits[i].split("--"); CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), null, CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0); srparser.treebankRules.addUnaryRuleInfo(info, key); } } for (int k = 0; k < nbRules; k++) { s = input.readLine().trim(); splits = s.split(" "); String key = splits[0]; for (int i = 1; i < splits.length; i++) { String[] parts = splits[i].split("--"); CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0); srparser.treebankRules.addBinaryRuleInfo(info, key); } } for (int k = 0; k < nrRules; k++) { s = input.readLine().trim(); splits = s.split(" "); String key = splits[0]; for (int i = 1; i < splits.length; i++) { String[] parts = splits[i].split("--"); CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0); srparser.treebankRules.addRevealRuleInfo(info, key); } } for (int k = 0; k < nDict; ++k) { s = input.readLine(); splits = s.split(" "); knownWords.add(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); index = index + 1; } for (int k = 0; k < nPOS; ++k) { s = input.readLine(); splits = s.split(" "); knownPos.add(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); index = index + 1; } for (int k = 0; k < nccgCat; ++k) { s = input.readLine(); splits = s.split(" "); knownCCGCats.add(splits[0]); for (int i = 0; i < eSize; ++i) E[index][i] = Double.parseDouble(splits[i + 1]); index = index + 1; } generateIDs(); double[][] W1 = new double[hSize][eSize * nTokens]; for (int j = 0; j < W1[0].length; ++j) { s = input.readLine(); splits = s.split(" "); for (int i = 0; i < W1.length; ++i) W1[i][j] = Double.parseDouble(splits[i]); } double[] b1 = new double[hSize]; s = input.readLine(); splits = s.split(" "); for (int i = 0; i < b1.length; ++i) b1[i] = Double.parseDouble(splits[i]); double[][] W2 = new double[classes][hSize]; for (int j = 0; j < W2[0].length; ++j) { s = input.readLine(); splits = s.split(" "); for (int i = 0; i < W2.length; ++i) W2[i][j] = Double.parseDouble(splits[i]); } preComputed = new ArrayList<Integer>(); while (preComputed.size() < nPreComputed) { s = input.readLine(); splits = s.split(" "); for (String split : splits) { preComputed.add(Integer.parseInt(split)); } } input.close(); classifier = new Classifier(config, E, W1, b1, W2, preComputed); } catch (IOException e) { throw new RuntimeIOException(e); } // initialize the loaded parser // Pre-compute matrix multiplications if (config.numPreComputed > 0) { classifier.preCompute(); } t.done("Initializing ccg parser"); }
From source file:knu.univ.lingvo.coref.ACEMentionExtractor.java
License:Open Source License
public Document nextDoc() throws Exception { List<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>(); List<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>(); List<List<Mention>> allPredictedMentions; List<Tree> allTrees = new ArrayList<Tree>(); Annotation anno;/*from www .ja v a 2 s . c o m*/ try { String filename = ""; while (files.length > fileIndex) { if (files[fileIndex].contains("apf.xml")) { filename = files[fileIndex]; fileIndex++; break; } else { fileIndex++; filename = ""; } } if (files.length <= fileIndex && filename.equals("")) return null; anno = aceReader.parse(corpusPath + filename); stanfordProcessor.annotate(anno); List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap s : sentences) { int i = 1; for (CoreLabel w : s.get(CoreAnnotations.TokensAnnotation.class)) { w.set(CoreAnnotations.IndexAnnotation.class, i++); if (!w.containsKey(CoreAnnotations.UtteranceAnnotation.class)) { w.set(CoreAnnotations.UtteranceAnnotation.class, 0); } } allTrees.add(s.get(TreeCoreAnnotations.TreeAnnotation.class)); allWords.add(s.get(CoreAnnotations.TokensAnnotation.class)); EntityComparator comparator = new EntityComparator(); extractGoldMentions(s, allGoldMentions, comparator); } if (Constants.USE_GOLD_MENTIONS) allPredictedMentions = allGoldMentions; else allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries); printRawDoc(sentences, allGoldMentions, filename, true); printRawDoc(sentences, allPredictedMentions, filename, false); } catch (IOException e) { throw new RuntimeIOException(e); } return arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true); }
From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java
License:Open Source License
public static LogisticClassifier<String, String> getSingletonPredictorFromSerializedFile( String serializedFile) {//from www .j a v a 2 s . c om try { ObjectInputStream ois = IOUtils.readStreamFromString(serializedFile); Object o = ois.readObject(); if (o instanceof LogisticClassifier<?, ?>) { return (LogisticClassifier<String, String>) o; } throw new ClassCastException("Wanted SingletonPredictor, got " + o.getClass()); } catch (IOException e) { throw new RuntimeIOException(e); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } }
From source file:process.PTBTokenizer.java
License:Open Source License
/** * Internally fetches the next token.//from w w w. j a v a 2s .c o m * * @return the next token in the token stream, or null if none exists. */ @Override @SuppressWarnings("unchecked") protected T getNext() { // if (lexer == null) { // return null; // } try { return (T) lexer.next(); } catch (IOException e) { throw new RuntimeIOException(e); } // cdm 2007: this shouldn't be necessary: PTBLexer decides for itself // whether to return CRs based on the same flag! // get rid of CRs if necessary // while (!tokenizeNLs && PTBLexer.cr.equals(((HasWord) token).word())) // { // token = (T)lexer.next(); // } // horatio: we used to catch exceptions here, which led to broken // behavior and made it very difficult to debug whatever the // problem was. }