Example usage for edu.stanford.nlp.io RuntimeIOException RuntimeIOException

Introduction

In this page you can find the example usage for edu.stanford.nlp.io RuntimeIOException RuntimeIOException.

Prototype

public RuntimeIOException(Throwable cause)

Source Link

Document

Creates a new exception with an embedded cause.

Usage

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

private double[][] readEmbedFile(String embedFile, Map<String, Integer> embedID) {

    double[][] embeddings = null;
    if (embedFile != null) {
        BufferedReader input = null;
        try {//from   w w  w  . java2s .  co m
            input = IOUtils.readerFromString(embedFile);
            List<String> lines = new ArrayList<String>();
            for (String s; (s = input.readLine()) != null;) {
                lines.add(s);
            }

            int nWords = lines.size();
            String[] splits = lines.get(0).split("\\s+");

            int dim = splits.length - 1;
            embeddings = new double[nWords][dim];
            System.err.println("Embedding File " + embedFile + ": #Words = " + nWords + ", dim = " + dim);

            if (dim != config.embeddingSize)
                throw new IllegalArgumentException(
                        "The dimension of embedding file does not match config.embeddingSize");

            for (int i = 0; i < lines.size(); ++i) {
                splits = lines.get(i).split("\\s+");
                embedID.put(splits[0], i);
                for (int j = 0; j < dim; ++j)
                    embeddings[i][j] = Double.parseDouble(splits[j + 1]);
            }
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        } finally {
            IOUtils.closeIgnoringExceptions(input);
        }
    }
    return embeddings;
}

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

private void setupClassifierForTraining(List<CCGJSentence> trainSents, List<CCGJTreeNode> trainTrees,
        String embedFile, String preModel) throws IOException {
    double[][] E = new double[knownWords.size() + knownPos.size() + knownCCGCats.size()][config.embeddingSize];
    double[][] W1 = new double[config.hiddenSize][config.embeddingSize * config.numTokens];
    double[] b1 = new double[config.hiddenSize];
    double[][] W2 = new double[actsList.size()][config.hiddenSize];

    // Randomly initialize weight matrices / vectors
    Random random = Util.getRandom();
    for (int i = 0; i < W1.length; ++i)
        for (int j = 0; j < W1[i].length; ++j)
            W1[i][j] = random.nextDouble() * 2 * config.initRange - config.initRange;

    for (int i = 0; i < b1.length; ++i)
        b1[i] = random.nextDouble() * 2 * config.initRange - config.initRange;

    for (int i = 0; i < W2.length; ++i)
        for (int j = 0; j < W2[i].length; ++j)
            W2[i][j] = random.nextDouble() * 2 * config.initRange - config.initRange;

    // Read embeddings into `embedID`, `embeddings`
    Map<String, Integer> embedID = new HashMap<String, Integer>();
    double[][] embeddings = readEmbedFile(embedFile, embedID);

    // Try to match loaded embeddings with words in dictionary
    int foundEmbed = 0;
    for (int i = 0; i < E.length; ++i) {
        int index = -1;
        if (i < knownWords.size()) {
            String str = knownWords.get(i);
            //NOTE: exact match first, and then try lower case..
            if (embedID.containsKey(str))
                index = embedID.get(str);
            else if (embedID.containsKey(str.toLowerCase()))
                index = embedID.get(str.toLowerCase());
        }//w ww  .j a v a  2s .  c  om
        if (index >= 0) {
            ++foundEmbed;
            for (int j = 0; j < E[i].length; ++j)
                E[i][j] = embeddings[index][j];
        } else {
            for (int j = 0; j < E[i].length; ++j)
                E[i][j] = random.nextDouble() * 0.02 - 0.01;
        }
    }
    System.err.println("Found embeddings: " + foundEmbed + " / " + knownWords.size());

    if (preModel != null) {
        try {
            System.err.println("Loading pre-trained model file: " + preModel + " ... ");
            String s;
            BufferedReader input = IOUtils.readerFromString(preModel);

            s = input.readLine();
            int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int nLabel = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();

            String[] splits;
            for (int k = 0; k < nDict; ++k) {
                s = input.readLine();
                splits = s.split(" ");
                if (wordIDs.containsKey(splits[0]) && eSize == config.embeddingSize) {
                    int index = getWordID(splits[0]);
                    for (int i = 0; i < eSize; ++i)
                        E[index][i] = Double.parseDouble(splits[i + 1]);
                }
            }

            for (int k = 0; k < nPOS; ++k) {
                s = input.readLine();
                splits = s.split(" ");
                if (posIDs.containsKey(splits[0]) && eSize == config.embeddingSize) {
                    int index = getPosID(splits[0]);
                    for (int i = 0; i < eSize; ++i)
                        E[index][i] = Double.parseDouble(splits[i + 1]);
                }
            }

            for (int k = 0; k < nLabel; ++k) {
                s = input.readLine();
                splits = s.split(" ");
                if (ccgcatIDs.containsKey(splits[0]) && eSize == config.embeddingSize) {
                    int index = getCCGCatID(splits[0]);
                    for (int i = 0; i < eSize; ++i)
                        E[index][i] = Double.parseDouble(splits[i + 1]);
                }
            }

            boolean copyLayer1 = hSize == config.hiddenSize && config.embeddingSize == eSize
                    && config.numTokens == nTokens;
            if (copyLayer1) {
                System.err.println("Copying parameters W1 && b1...");
            }
            for (int j = 0; j < eSize * nTokens; ++j) {
                s = input.readLine();
                if (copyLayer1) {
                    splits = s.split(" ");
                    for (int i = 0; i < hSize; ++i)
                        W1[i][j] = Double.parseDouble(splits[i]);
                }
            }

            s = input.readLine();
            if (copyLayer1) {
                splits = s.split(" ");
                for (int i = 0; i < hSize; ++i)
                    b1[i] = Double.parseDouble(splits[i]);
            }

            boolean copyLayer2 = (nLabel * 2 - 1 == actsList.size()) && hSize == config.hiddenSize;
            if (copyLayer2)
                System.err.println("Copying parameters W2...");
            for (int j = 0; j < hSize; ++j) {
                s = input.readLine();
                if (copyLayer2) {
                    splits = s.split(" ");
                    for (int i = 0; i < nLabel * 2 - 1; ++i)
                        W2[i][j] = Double.parseDouble(splits[i]);
                }
            }
            input.close();
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }
    Dataset trainSet = genTrainExamples(trainSents, trainTrees);
    classifier = new Classifier(config, trainSet, E, W1, b1, W2, preComputed);
}

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

private void loadModelFile(String modelFile, boolean verbose) throws IOException {
    Timing t = new Timing();
    try {//from   w  w w.  ja  v a  2  s.com

        System.err.println("Loading ccg parser model file: " + modelFile + " ... ");
        String s;
        BufferedReader input = IOUtils.readerFromString(modelFile);

        s = input.readLine();
        int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nccgCat = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nPreComputed = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int classes = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nuRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nbRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nrRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));

        actsMap = new HashMap<>();
        knownWords = new ArrayList<>();
        knownPos = new ArrayList<>();
        knownCCGCats = new ArrayList<>();
        srparser = new NonInc();

        double[][] E = new double[nDict + nPOS + nccgCat][eSize];
        String[] splits;
        int index = 0;

        for (int k = 0; k < classes; k++) {
            s = input.readLine().trim();
            splits = s.split("--");
            actsMap.put(ArcJAction.make(SRAction.valueOf(splits[0]), Integer.parseInt(splits[1]), splits[2],
                    RuleType.valueOf(splits[3])), k);
        }

        for (int k = 0; k < nuRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), null,
                        CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]),
                        Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addUnaryRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nbRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]),
                        CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]),
                        parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addBinaryRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nrRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]),
                        CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]),
                        parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addRevealRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nDict; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownWords.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        for (int k = 0; k < nPOS; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownPos.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        for (int k = 0; k < nccgCat; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownCCGCats.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        generateIDs();

        double[][] W1 = new double[hSize][eSize * nTokens];
        for (int j = 0; j < W1[0].length; ++j) {
            s = input.readLine();
            splits = s.split(" ");
            for (int i = 0; i < W1.length; ++i)
                W1[i][j] = Double.parseDouble(splits[i]);
        }

        double[] b1 = new double[hSize];
        s = input.readLine();
        splits = s.split(" ");
        for (int i = 0; i < b1.length; ++i)
            b1[i] = Double.parseDouble(splits[i]);

        double[][] W2 = new double[classes][hSize];
        for (int j = 0; j < W2[0].length; ++j) {
            s = input.readLine();
            splits = s.split(" ");
            for (int i = 0; i < W2.length; ++i)
                W2[i][j] = Double.parseDouble(splits[i]);
        }

        preComputed = new ArrayList<Integer>();
        while (preComputed.size() < nPreComputed) {
            s = input.readLine();
            splits = s.split(" ");
            for (String split : splits) {
                preComputed.add(Integer.parseInt(split));
            }
        }
        input.close();
        classifier = new Classifier(config, E, W1, b1, W2, preComputed);
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }

    // initialize the loaded parser

    // Pre-compute matrix multiplications
    if (config.numPreComputed > 0) {
        classifier.preCompute();
    }
    t.done("Initializing ccg parser");
}

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

public void writeModelFile(String modelFile) {
    try {//from   w w  w. j a va 2 s .  c o m
        double[][] W1 = classifier.getW1();
        double[] b1 = classifier.getb1();
        double[][] W2 = classifier.getW2();
        double[][] E = classifier.getE();

        Writer output = IOUtils.getPrintWriter(modelFile);

        HashMap<String, ArrayList<CCGJRuleInfo>> uRules = srparser.treebankRules.getUnaryRules();
        HashMap<String, ArrayList<CCGJRuleInfo>> bRules = srparser.treebankRules.getBinaryRules();
        HashMap<String, ArrayList<CCGJRuleInfo>> rRules = srparser.treebankRules.getRevealRules();

        output.write("dict=" + knownWords.size() + "\n");
        output.write("pos=" + knownPos.size() + "\n");
        output.write("ccg cats=" + knownCCGCats.size() + "\n");
        output.write("embeddingSize=" + E[0].length + "\n");
        output.write("hiddenSize=" + b1.length + "\n");
        output.write("numTokens=" + (W1[0].length / E[0].length) + "\n");
        output.write("preComputed=" + preComputed.size() + "\n");
        output.write("classes=" + actsMap.size() + "\n");
        output.write("UnaryRules=" + uRules.size() + "\n");
        output.write("BinaryRules=" + bRules.size() + "\n");
        output.write("RevealRules=" + rRules.size() + "\n");
        int index = 0;

        // Classes
        for (ArcJAction act : actsList)
            output.write(act.toString() + "\n");

        // Unary and Binary Rules
        for (String key : uRules.keySet()) {
            ArrayList<CCGJRuleInfo> list = uRules.get(key);
            output.write(key);
            for (CCGJRuleInfo info : list) {
                output.write("  " + info.toString());
            }
            output.write("\n");
        }
        for (String key : bRules.keySet()) {
            ArrayList<CCGJRuleInfo> list = bRules.get(key);
            output.write(key);
            for (CCGJRuleInfo info : list) {
                output.write("  " + info.toString());
            }
            output.write("\n");
        }
        for (String key : rRules.keySet()) {
            ArrayList<CCGJRuleInfo> list = rRules.get(key);
            output.write(key);
            for (CCGJRuleInfo info : list) {
                output.write("  " + info.toString());
            }
            output.write("\n");
        }

        // First write word / POS / label embeddings
        for (String word : knownWords) {
            output.write(word);
            for (int k = 0; k < E[index].length; ++k)
                output.write(" " + E[index][k]);
            output.write("\n");
            index = index + 1;
        }
        for (String pos : knownPos) {
            output.write(pos);
            for (int k = 0; k < E[index].length; ++k)
                output.write(" " + E[index][k]);
            output.write("\n");
            index = index + 1;
        }
        for (String label : knownCCGCats) {
            output.write(label);
            for (int k = 0; k < E[index].length; ++k)
                output.write(" " + E[index][k]);
            output.write("\n");
            index = index + 1;
        }

        // Now write classifier weights
        for (int j = 0; j < W1[0].length; ++j)
            for (int i = 0; i < W1.length; ++i) {
                output.write("" + W1[i][j]);
                if (i == W1.length - 1)
                    output.write("\n");
                else
                    output.write(" ");
            }
        for (int i = 0; i < b1.length; ++i) {
            output.write("" + b1[i]);
            if (i == b1.length - 1)
                output.write("\n");
            else
                output.write(" ");
        }
        for (int j = 0; j < W2[0].length; ++j)
            for (int i = 0; i < W2.length; ++i) {
                output.write("" + W2[i][j]);
                if (i == W2.length - 1)
                    output.write("\n");
                else
                    output.write(" ");
            }

        // Finish with pre-computation info
        for (int i = 0; i < preComputed.size(); ++i) {
            output.write("" + preComputed.get(i));
            if ((i + 1) % 100 == 0 || i == preComputed.size() - 1)
                output.write("\n");
            else
                output.write(" ");
        }

        output.close();
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
}

From source file:ilcc.ccgparser.test.IncExtractProb.java

private void loadModelFile(String modelFile, boolean verbose) throws IOException {
    Timing t = new Timing();
    try {/*from   w ww. ja  v a2  s. com*/

        System.err.println("Loading ccg parser model file: " + modelFile + " ... ");
        String s;
        BufferedReader input = IOUtils.readerFromString(modelFile);

        s = input.readLine();
        int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nccgCat = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nPreComputed = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int classes = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nuRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nbRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nrRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));

        actsMap = new HashMap<>();
        knownWords = new ArrayList<>();
        knownPos = new ArrayList<>();
        knownCCGCats = new ArrayList<>();

        double[][] E = new double[nDict + nPOS + nccgCat][eSize];
        String[] splits;
        int index = 0;

        for (int k = 0; k < classes; k++) {
            s = input.readLine().trim();
            splits = s.split("--");
            actsMap.put(ArcJAction.make(SRAction.valueOf(splits[0]), Integer.parseInt(splits[1]), splits[2],
                    RuleType.valueOf(splits[3])), k);
        }

        for (int k = 0; k < nuRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), null,
                        CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]),
                        Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addUnaryRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nbRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]),
                        CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]),
                        parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addBinaryRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nrRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]),
                        CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]),
                        parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addRevealRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nDict; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownWords.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        for (int k = 0; k < nPOS; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownPos.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        for (int k = 0; k < nccgCat; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownCCGCats.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        generateIDs();

        double[][] W1 = new double[hSize][eSize * nTokens];
        for (int j = 0; j < W1[0].length; ++j) {
            s = input.readLine();
            splits = s.split(" ");
            for (int i = 0; i < W1.length; ++i)
                W1[i][j] = Double.parseDouble(splits[i]);
        }

        double[] b1 = new double[hSize];
        s = input.readLine();
        splits = s.split(" ");
        for (int i = 0; i < b1.length; ++i)
            b1[i] = Double.parseDouble(splits[i]);

        double[][] W2 = new double[classes][hSize];
        for (int j = 0; j < W2[0].length; ++j) {
            s = input.readLine();
            splits = s.split(" ");
            for (int i = 0; i < W2.length; ++i)
                W2[i][j] = Double.parseDouble(splits[i]);
        }

        preComputed = new ArrayList<Integer>();
        while (preComputed.size() < nPreComputed) {
            s = input.readLine();
            splits = s.split(" ");
            for (String split : splits) {
                preComputed.add(Integer.parseInt(split));
            }
        }
        input.close();
        classifier = new Classifier(config, E, W1, b1, W2, preComputed);
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }

    // initialize the loaded parser

    // Pre-compute matrix multiplications
    if (config.numPreComputed > 0) {
        classifier.preCompute();
    }
    t.done("Initializing ccg parser");
}

From source file:knu.univ.lingvo.coref.ACEMentionExtractor.java

License:Open Source License

public Document nextDoc() throws Exception {
    List<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>();
    List<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>();
    List<List<Mention>> allPredictedMentions;
    List<Tree> allTrees = new ArrayList<Tree>();

    Annotation anno;/*from www  .ja  v a 2 s .  c o m*/

    try {
        String filename = "";
        while (files.length > fileIndex) {
            if (files[fileIndex].contains("apf.xml")) {
                filename = files[fileIndex];
                fileIndex++;
                break;
            } else {
                fileIndex++;
                filename = "";
            }
        }
        if (files.length <= fileIndex && filename.equals(""))
            return null;

        anno = aceReader.parse(corpusPath + filename);
        stanfordProcessor.annotate(anno);

        List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);

        for (CoreMap s : sentences) {
            int i = 1;
            for (CoreLabel w : s.get(CoreAnnotations.TokensAnnotation.class)) {
                w.set(CoreAnnotations.IndexAnnotation.class, i++);
                if (!w.containsKey(CoreAnnotations.UtteranceAnnotation.class)) {
                    w.set(CoreAnnotations.UtteranceAnnotation.class, 0);
                }
            }
            allTrees.add(s.get(TreeCoreAnnotations.TreeAnnotation.class));
            allWords.add(s.get(CoreAnnotations.TokensAnnotation.class));
            EntityComparator comparator = new EntityComparator();
            extractGoldMentions(s, allGoldMentions, comparator);
        }

        if (Constants.USE_GOLD_MENTIONS)
            allPredictedMentions = allGoldMentions;
        else
            allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries);

        printRawDoc(sentences, allGoldMentions, filename, true);
        printRawDoc(sentences, allPredictedMentions, filename, false);
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }

    return arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

public static LogisticClassifier<String, String> getSingletonPredictorFromSerializedFile(
        String serializedFile) {//from  www  .j  a  v  a  2  s . c om
    try {
        ObjectInputStream ois = IOUtils.readStreamFromString(serializedFile);
        Object o = ois.readObject();
        if (o instanceof LogisticClassifier<?, ?>) {
            return (LogisticClassifier<String, String>) o;
        }
        throw new ClassCastException("Wanted SingletonPredictor, got " + o.getClass());
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    }
}

From source file:process.PTBTokenizer.java

License:Open Source License

/**
 * Internally fetches the next token.//from   w  w  w. j a v  a  2s .c o  m
 * 
 * @return the next token in the token stream, or null if none exists.
 */
@Override
@SuppressWarnings("unchecked")
protected T getNext() {
    // if (lexer == null) {
    // return null;
    // }
    try {
        return (T) lexer.next();
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
    // cdm 2007: this shouldn't be necessary: PTBLexer decides for itself
    // whether to return CRs based on the same flag!
    // get rid of CRs if necessary
    // while (!tokenizeNLs && PTBLexer.cr.equals(((HasWord) token).word()))
    // {
    // token = (T)lexer.next();
    // }

    // horatio: we used to catch exceptions here, which led to broken
    // behavior and made it very difficult to debug whatever the
    // problem was.
}