Example usage for edu.stanford.nlp.io RuntimeIOException RuntimeIOException

List of usage examples for edu.stanford.nlp.io RuntimeIOException RuntimeIOException

Introduction

In this page you can find the example usage for edu.stanford.nlp.io RuntimeIOException RuntimeIOException.

Prototype

public RuntimeIOException(Throwable cause) 

Source Link

Document

Creates a new exception with an embedded cause.

Usage

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

private double[][] readEmbedFile(String embedFile, Map<String, Integer> embedID) {

    double[][] embeddings = null;
    if (embedFile != null) {
        BufferedReader input = null;
        try {//from   w w  w  . java2s .  co m
            input = IOUtils.readerFromString(embedFile);
            List<String> lines = new ArrayList<String>();
            for (String s; (s = input.readLine()) != null;) {
                lines.add(s);
            }

            int nWords = lines.size();
            String[] splits = lines.get(0).split("\\s+");

            int dim = splits.length - 1;
            embeddings = new double[nWords][dim];
            System.err.println("Embedding File " + embedFile + ": #Words = " + nWords + ", dim = " + dim);

            if (dim != config.embeddingSize)
                throw new IllegalArgumentException(
                        "The dimension of embedding file does not match config.embeddingSize");

            for (int i = 0; i < lines.size(); ++i) {
                splits = lines.get(i).split("\\s+");
                embedID.put(splits[0], i);
                for (int j = 0; j < dim; ++j)
                    embeddings[i][j] = Double.parseDouble(splits[j + 1]);
            }
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        } finally {
            IOUtils.closeIgnoringExceptions(input);
        }
    }
    return embeddings;
}

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

private void setupClassifierForTraining(List<CCGJSentence> trainSents, List<CCGJTreeNode> trainTrees,
        String embedFile, String preModel) throws IOException {
    double[][] E = new double[knownWords.size() + knownPos.size() + knownCCGCats.size()][config.embeddingSize];
    double[][] W1 = new double[config.hiddenSize][config.embeddingSize * config.numTokens];
    double[] b1 = new double[config.hiddenSize];
    double[][] W2 = new double[actsList.size()][config.hiddenSize];

    // Randomly initialize weight matrices / vectors
    Random random = Util.getRandom();
    for (int i = 0; i < W1.length; ++i)
        for (int j = 0; j < W1[i].length; ++j)
            W1[i][j] = random.nextDouble() * 2 * config.initRange - config.initRange;

    for (int i = 0; i < b1.length; ++i)
        b1[i] = random.nextDouble() * 2 * config.initRange - config.initRange;

    for (int i = 0; i < W2.length; ++i)
        for (int j = 0; j < W2[i].length; ++j)
            W2[i][j] = random.nextDouble() * 2 * config.initRange - config.initRange;

    // Read embeddings into `embedID`, `embeddings`
    Map<String, Integer> embedID = new HashMap<String, Integer>();
    double[][] embeddings = readEmbedFile(embedFile, embedID);

    // Try to match loaded embeddings with words in dictionary
    int foundEmbed = 0;
    for (int i = 0; i < E.length; ++i) {
        int index = -1;
        if (i < knownWords.size()) {
            String str = knownWords.get(i);
            //NOTE: exact match first, and then try lower case..
            if (embedID.containsKey(str))
                index = embedID.get(str);
            else if (embedID.containsKey(str.toLowerCase()))
                index = embedID.get(str.toLowerCase());
        }//w ww  .j a v a  2s .  c  om
        if (index >= 0) {
            ++foundEmbed;
            for (int j = 0; j < E[i].length; ++j)
                E[i][j] = embeddings[index][j];
        } else {
            for (int j = 0; j < E[i].length; ++j)
                E[i][j] = random.nextDouble() * 0.02 - 0.01;
        }
    }
    System.err.println("Found embeddings: " + foundEmbed + " / " + knownWords.size());

    if (preModel != null) {
        try {
            System.err.println("Loading pre-trained model file: " + preModel + " ... ");
            String s;
            BufferedReader input = IOUtils.readerFromString(preModel);

            s = input.readLine();
            int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int nLabel = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();

            String[] splits;
            for (int k = 0; k < nDict; ++k) {
                s = input.readLine();
                splits = s.split(" ");
                if (wordIDs.containsKey(splits[0]) && eSize == config.embeddingSize) {
                    int index = getWordID(splits[0]);
                    for (int i = 0; i < eSize; ++i)
                        E[index][i] = Double.parseDouble(splits[i + 1]);
                }
            }

            for (int k = 0; k < nPOS; ++k) {
                s = input.readLine();
                splits = s.split(" ");
                if (posIDs.containsKey(splits[0]) && eSize == config.embeddingSize) {
                    int index = getPosID(splits[0]);
                    for (int i = 0; i < eSize; ++i)
                        E[index][i] = Double.parseDouble(splits[i + 1]);
                }
            }

            for (int k = 0; k < nLabel; ++k) {
                s = input.readLine();
                splits = s.split(" ");
                if (ccgcatIDs.containsKey(splits[0]) && eSize == config.embeddingSize) {
                    int index = getCCGCatID(splits[0]);
                    for (int i = 0; i < eSize; ++i)
                        E[index][i] = Double.parseDouble(splits[i + 1]);
                }
            }

            boolean copyLayer1 = hSize == config.hiddenSize && config.embeddingSize == eSize
                    && config.numTokens == nTokens;
            if (copyLayer1) {
                System.err.println("Copying parameters W1 && b1...");
            }
            for (int j = 0; j < eSize * nTokens; ++j) {
                s = input.readLine();
                if (copyLayer1) {
                    splits = s.split(" ");
                    for (int i = 0; i < hSize; ++i)
                        W1[i][j] = Double.parseDouble(splits[i]);
                }
            }

            s = input.readLine();
            if (copyLayer1) {
                splits = s.split(" ");
                for (int i = 0; i < hSize; ++i)
                    b1[i] = Double.parseDouble(splits[i]);
            }

            boolean copyLayer2 = (nLabel * 2 - 1 == actsList.size()) && hSize == config.hiddenSize;
            if (copyLayer2)
                System.err.println("Copying parameters W2...");
            for (int j = 0; j < hSize; ++j) {
                s = input.readLine();
                if (copyLayer2) {
                    splits = s.split(" ");
                    for (int i = 0; i < nLabel * 2 - 1; ++i)
                        W2[i][j] = Double.parseDouble(splits[i]);
                }
            }
            input.close();
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }
    Dataset trainSet = genTrainExamples(trainSents, trainTrees);
    classifier = new Classifier(config, trainSet, E, W1, b1, W2, preComputed);
}

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

private void loadModelFile(String modelFile, boolean verbose) throws IOException {
    Timing t = new Timing();
    try {//from   w  w w.  ja  v a  2  s.com

        System.err.println("Loading ccg parser model file: " + modelFile + " ... ");
        String s;
        BufferedReader input = IOUtils.readerFromString(modelFile);

        s = input.readLine();
        int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nccgCat = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nPreComputed = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int classes = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nuRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nbRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nrRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));

        actsMap = new HashMap<>();
        knownWords = new ArrayList<>();
        knownPos = new ArrayList<>();
        knownCCGCats = new ArrayList<>();
        srparser = new NonInc();

        double[][] E = new double[nDict + nPOS + nccgCat][eSize];
        String[] splits;
        int index = 0;

        for (int k = 0; k < classes; k++) {
            s = input.readLine().trim();
            splits = s.split("--");
            actsMap.put(ArcJAction.make(SRAction.valueOf(splits[0]), Integer.parseInt(splits[1]), splits[2],
                    RuleType.valueOf(splits[3])), k);
        }

        for (int k = 0; k < nuRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), null,
                        CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]),
                        Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addUnaryRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nbRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]),
                        CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]),
                        parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addBinaryRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nrRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]),
                        CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]),
                        parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addRevealRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nDict; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownWords.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        for (int k = 0; k < nPOS; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownPos.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        for (int k = 0; k < nccgCat; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownCCGCats.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        generateIDs();

        double[][] W1 = new double[hSize][eSize * nTokens];
        for (int j = 0; j < W1[0].length; ++j) {
            s = input.readLine();
            splits = s.split(" ");
            for (int i = 0; i < W1.length; ++i)
                W1[i][j] = Double.parseDouble(splits[i]);
        }

        double[] b1 = new double[hSize];
        s = input.readLine();
        splits = s.split(" ");
        for (int i = 0; i < b1.length; ++i)
            b1[i] = Double.parseDouble(splits[i]);

        double[][] W2 = new double[classes][hSize];
        for (int j = 0; j < W2[0].length; ++j) {
            s = input.readLine();
            splits = s.split(" ");
            for (int i = 0; i < W2.length; ++i)
                W2[i][j] = Double.parseDouble(splits[i]);
        }

        preComputed = new ArrayList<Integer>();
        while (preComputed.size() < nPreComputed) {
            s = input.readLine();
            splits = s.split(" ");
            for (String split : splits) {
                preComputed.add(Integer.parseInt(split));
            }
        }
        input.close();
        classifier = new Classifier(config, E, W1, b1, W2, preComputed);
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }

    // initialize the loaded parser

    // Pre-compute matrix multiplications
    if (config.numPreComputed > 0) {
        classifier.preCompute();
    }
    t.done("Initializing ccg parser");
}

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

public void writeModelFile(String modelFile) {
    try {//from   w w  w. j a va 2 s .  c o m
        double[][] W1 = classifier.getW1();
        double[] b1 = classifier.getb1();
        double[][] W2 = classifier.getW2();
        double[][] E = classifier.getE();

        Writer output = IOUtils.getPrintWriter(modelFile);

        HashMap<String, ArrayList<CCGJRuleInfo>> uRules = srparser.treebankRules.getUnaryRules();
        HashMap<String, ArrayList<CCGJRuleInfo>> bRules = srparser.treebankRules.getBinaryRules();
        HashMap<String, ArrayList<CCGJRuleInfo>> rRules = srparser.treebankRules.getRevealRules();

        output.write("dict=" + knownWords.size() + "\n");
        output.write("pos=" + knownPos.size() + "\n");
        output.write("ccg cats=" + knownCCGCats.size() + "\n");
        output.write("embeddingSize=" + E[0].length + "\n");
        output.write("hiddenSize=" + b1.length + "\n");
        output.write("numTokens=" + (W1[0].length / E[0].length) + "\n");
        output.write("preComputed=" + preComputed.size() + "\n");
        output.write("classes=" + actsMap.size() + "\n");
        output.write("UnaryRules=" + uRules.size() + "\n");
        output.write("BinaryRules=" + bRules.size() + "\n");
        output.write("RevealRules=" + rRules.size() + "\n");
        int index = 0;

        // Classes
        for (ArcJAction act : actsList)
            output.write(act.toString() + "\n");

        // Unary and Binary Rules
        for (String key : uRules.keySet()) {
            ArrayList<CCGJRuleInfo> list = uRules.get(key);
            output.write(key);
            for (CCGJRuleInfo info : list) {
                output.write("  " + info.toString());
            }
            output.write("\n");
        }
        for (String key : bRules.keySet()) {
            ArrayList<CCGJRuleInfo> list = bRules.get(key);
            output.write(key);
            for (CCGJRuleInfo info : list) {
                output.write("  " + info.toString());
            }
            output.write("\n");
        }
        for (String key : rRules.keySet()) {
            ArrayList<CCGJRuleInfo> list = rRules.get(key);
            output.write(key);
            for (CCGJRuleInfo info : list) {
                output.write("  " + info.toString());
            }
            output.write("\n");
        }

        // First write word / POS / label embeddings
        for (String word : knownWords) {
            output.write(word);
            for (int k = 0; k < E[index].length; ++k)
                output.write(" " + E[index][k]);
            output.write("\n");
            index = index + 1;
        }
        for (String pos : knownPos) {
            output.write(pos);
            for (int k = 0; k < E[index].length; ++k)
                output.write(" " + E[index][k]);
            output.write("\n");
            index = index + 1;
        }
        for (String label : knownCCGCats) {
            output.write(label);
            for (int k = 0; k < E[index].length; ++k)
                output.write(" " + E[index][k]);
            output.write("\n");
            index = index + 1;
        }

        // Now write classifier weights
        for (int j = 0; j < W1[0].length; ++j)
            for (int i = 0; i < W1.length; ++i) {
                output.write("" + W1[i][j]);
                if (i == W1.length - 1)
                    output.write("\n");
                else
                    output.write(" ");
            }
        for (int i = 0; i < b1.length; ++i) {
            output.write("" + b1[i]);
            if (i == b1.length - 1)
                output.write("\n");
            else
                output.write(" ");
        }
        for (int j = 0; j < W2[0].length; ++j)
            for (int i = 0; i < W2.length; ++i) {
                output.write("" + W2[i][j]);
                if (i == W2.length - 1)
                    output.write("\n");
                else
                    output.write(" ");
            }

        // Finish with pre-computation info
        for (int i = 0; i < preComputed.size(); ++i) {
            output.write("" + preComputed.get(i));
            if ((i + 1) % 100 == 0 || i == preComputed.size() - 1)
                output.write("\n");
            else
                output.write(" ");
        }

        output.close();
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
}

From source file:ilcc.ccgparser.test.IncExtractProb.java

private void loadModelFile(String modelFile, boolean verbose) throws IOException {
    Timing t = new Timing();
    try {/*from   w ww. ja  v a2  s. com*/

        System.err.println("Loading ccg parser model file: " + modelFile + " ... ");
        String s;
        BufferedReader input = IOUtils.readerFromString(modelFile);

        s = input.readLine();
        int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nccgCat = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nPreComputed = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int classes = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nuRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nbRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nrRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));

        actsMap = new HashMap<>();
        knownWords = new ArrayList<>();
        knownPos = new ArrayList<>();
        knownCCGCats = new ArrayList<>();

        double[][] E = new double[nDict + nPOS + nccgCat][eSize];
        String[] splits;
        int index = 0;

        for (int k = 0; k < classes; k++) {
            s = input.readLine().trim();
            splits = s.split("--");
            actsMap.put(ArcJAction.make(SRAction.valueOf(splits[0]), Integer.parseInt(splits[1]), splits[2],
                    RuleType.valueOf(splits[3])), k);
        }

        for (int k = 0; k < nuRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), null,
                        CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]),
                        Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addUnaryRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nbRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]),
                        CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]),
                        parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addBinaryRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nrRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]),
                        CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]),
                        parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addRevealRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nDict; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownWords.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        for (int k = 0; k < nPOS; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownPos.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        for (int k = 0; k < nccgCat; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownCCGCats.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        generateIDs();

        double[][] W1 = new double[hSize][eSize * nTokens];
        for (int j = 0; j < W1[0].length; ++j) {
            s = input.readLine();
            splits = s.split(" ");
            for (int i = 0; i < W1.length; ++i)
                W1[i][j] = Double.parseDouble(splits[i]);
        }

        double[] b1 = new double[hSize];
        s = input.readLine();
        splits = s.split(" ");
        for (int i = 0; i < b1.length; ++i)
            b1[i] = Double.parseDouble(splits[i]);

        double[][] W2 = new double[classes][hSize];
        for (int j = 0; j < W2[0].length; ++j) {
            s = input.readLine();
            splits = s.split(" ");
            for (int i = 0; i < W2.length; ++i)
                W2[i][j] = Double.parseDouble(splits[i]);
        }

        preComputed = new ArrayList<Integer>();
        while (preComputed.size() < nPreComputed) {
            s = input.readLine();
            splits = s.split(" ");
            for (String split : splits) {
                preComputed.add(Integer.parseInt(split));
            }
        }
        input.close();
        classifier = new Classifier(config, E, W1, b1, W2, preComputed);
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }

    // initialize the loaded parser

    // Pre-compute matrix multiplications
    if (config.numPreComputed > 0) {
        classifier.preCompute();
    }
    t.done("Initializing ccg parser");
}

From source file:knu.univ.lingvo.coref.ACEMentionExtractor.java

License:Open Source License

public Document nextDoc() throws Exception {
    List<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>();
    List<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>();
    List<List<Mention>> allPredictedMentions;
    List<Tree> allTrees = new ArrayList<Tree>();

    Annotation anno;/*from www  .ja  v a 2 s .  c o m*/

    try {
        String filename = "";
        while (files.length > fileIndex) {
            if (files[fileIndex].contains("apf.xml")) {
                filename = files[fileIndex];
                fileIndex++;
                break;
            } else {
                fileIndex++;
                filename = "";
            }
        }
        if (files.length <= fileIndex && filename.equals(""))
            return null;

        anno = aceReader.parse(corpusPath + filename);
        stanfordProcessor.annotate(anno);

        List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);

        for (CoreMap s : sentences) {
            int i = 1;
            for (CoreLabel w : s.get(CoreAnnotations.TokensAnnotation.class)) {
                w.set(CoreAnnotations.IndexAnnotation.class, i++);
                if (!w.containsKey(CoreAnnotations.UtteranceAnnotation.class)) {
                    w.set(CoreAnnotations.UtteranceAnnotation.class, 0);
                }
            }
            allTrees.add(s.get(TreeCoreAnnotations.TreeAnnotation.class));
            allWords.add(s.get(CoreAnnotations.TokensAnnotation.class));
            EntityComparator comparator = new EntityComparator();
            extractGoldMentions(s, allGoldMentions, comparator);
        }

        if (Constants.USE_GOLD_MENTIONS)
            allPredictedMentions = allGoldMentions;
        else
            allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries);

        printRawDoc(sentences, allGoldMentions, filename, true);
        printRawDoc(sentences, allPredictedMentions, filename, false);
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }

    return arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

public static LogisticClassifier<String, String> getSingletonPredictorFromSerializedFile(
        String serializedFile) {//from  www  .j  a  v  a  2  s . c om
    try {
        ObjectInputStream ois = IOUtils.readStreamFromString(serializedFile);
        Object o = ois.readObject();
        if (o instanceof LogisticClassifier<?, ?>) {
            return (LogisticClassifier<String, String>) o;
        }
        throw new ClassCastException("Wanted SingletonPredictor, got " + o.getClass());
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    }
}

From source file:process.PTBTokenizer.java

License:Open Source License

/**
 * Internally fetches the next token.//from   w  w  w. j a v  a  2s .c o  m
 * 
 * @return the next token in the token stream, or null if none exists.
 */
@Override
@SuppressWarnings("unchecked")
protected T getNext() {
    // if (lexer == null) {
    // return null;
    // }
    try {
        return (T) lexer.next();
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
    // cdm 2007: this shouldn't be necessary: PTBLexer decides for itself
    // whether to return CRs based on the same flag!
    // get rid of CRs if necessary
    // while (!tokenizeNLs && PTBLexer.cr.equals(((HasWord) token).word()))
    // {
    // token = (T)lexer.next();
    // }

    // horatio: we used to catch exceptions here, which led to broken
    // behavior and made it very difficult to debug whatever the
    // problem was.
}