Example usage for edu.stanford.nlp.io IOUtils readerFromString

List of usage examples for edu.stanford.nlp.io IOUtils readerFromString

Introduction

In this page you can find the example usage for edu.stanford.nlp.io IOUtils readerFromString.

Prototype

public static BufferedReader readerFromString(String textFileOrUrl) throws IOException 

Source Link

Document

Open a BufferedReader to a file, class path entry or URL specified by a String name.

Usage

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

private double[][] readEmbedFile(String embedFile, Map<String, Integer> embedID) {

    double[][] embeddings = null;
    if (embedFile != null) {
        BufferedReader input = null;
        try {/*  w  ww  .  ja  v a2  s.  c  o  m*/
            input = IOUtils.readerFromString(embedFile);
            List<String> lines = new ArrayList<String>();
            for (String s; (s = input.readLine()) != null;) {
                lines.add(s);
            }

            int nWords = lines.size();
            String[] splits = lines.get(0).split("\\s+");

            int dim = splits.length - 1;
            embeddings = new double[nWords][dim];
            System.err.println("Embedding File " + embedFile + ": #Words = " + nWords + ", dim = " + dim);

            if (dim != config.embeddingSize)
                throw new IllegalArgumentException(
                        "The dimension of embedding file does not match config.embeddingSize");

            for (int i = 0; i < lines.size(); ++i) {
                splits = lines.get(i).split("\\s+");
                embedID.put(splits[0], i);
                for (int j = 0; j < dim; ++j)
                    embeddings[i][j] = Double.parseDouble(splits[j + 1]);
            }
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        } finally {
            IOUtils.closeIgnoringExceptions(input);
        }
    }
    return embeddings;
}

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

private void setupClassifierForTraining(List<CCGJSentence> trainSents, List<CCGJTreeNode> trainTrees,
        String embedFile, String preModel) throws IOException {
    double[][] E = new double[knownWords.size() + knownPos.size() + knownCCGCats.size()][config.embeddingSize];
    double[][] W1 = new double[config.hiddenSize][config.embeddingSize * config.numTokens];
    double[] b1 = new double[config.hiddenSize];
    double[][] W2 = new double[actsList.size()][config.hiddenSize];

    // Randomly initialize weight matrices / vectors
    Random random = Util.getRandom();
    for (int i = 0; i < W1.length; ++i)
        for (int j = 0; j < W1[i].length; ++j)
            W1[i][j] = random.nextDouble() * 2 * config.initRange - config.initRange;

    for (int i = 0; i < b1.length; ++i)
        b1[i] = random.nextDouble() * 2 * config.initRange - config.initRange;

    for (int i = 0; i < W2.length; ++i)
        for (int j = 0; j < W2[i].length; ++j)
            W2[i][j] = random.nextDouble() * 2 * config.initRange - config.initRange;

    // Read embeddings into `embedID`, `embeddings`
    Map<String, Integer> embedID = new HashMap<String, Integer>();
    double[][] embeddings = readEmbedFile(embedFile, embedID);

    // Try to match loaded embeddings with words in dictionary
    int foundEmbed = 0;
    for (int i = 0; i < E.length; ++i) {
        int index = -1;
        if (i < knownWords.size()) {
            String str = knownWords.get(i);
            //NOTE: exact match first, and then try lower case..
            if (embedID.containsKey(str))
                index = embedID.get(str);
            else if (embedID.containsKey(str.toLowerCase()))
                index = embedID.get(str.toLowerCase());
        }//w w  w . j ava2s .c o  m
        if (index >= 0) {
            ++foundEmbed;
            for (int j = 0; j < E[i].length; ++j)
                E[i][j] = embeddings[index][j];
        } else {
            for (int j = 0; j < E[i].length; ++j)
                E[i][j] = random.nextDouble() * 0.02 - 0.01;
        }
    }
    System.err.println("Found embeddings: " + foundEmbed + " / " + knownWords.size());

    if (preModel != null) {
        try {
            System.err.println("Loading pre-trained model file: " + preModel + " ... ");
            String s;
            BufferedReader input = IOUtils.readerFromString(preModel);

            s = input.readLine();
            int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int nLabel = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();
            int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1));
            s = input.readLine();

            String[] splits;
            for (int k = 0; k < nDict; ++k) {
                s = input.readLine();
                splits = s.split(" ");
                if (wordIDs.containsKey(splits[0]) && eSize == config.embeddingSize) {
                    int index = getWordID(splits[0]);
                    for (int i = 0; i < eSize; ++i)
                        E[index][i] = Double.parseDouble(splits[i + 1]);
                }
            }

            for (int k = 0; k < nPOS; ++k) {
                s = input.readLine();
                splits = s.split(" ");
                if (posIDs.containsKey(splits[0]) && eSize == config.embeddingSize) {
                    int index = getPosID(splits[0]);
                    for (int i = 0; i < eSize; ++i)
                        E[index][i] = Double.parseDouble(splits[i + 1]);
                }
            }

            for (int k = 0; k < nLabel; ++k) {
                s = input.readLine();
                splits = s.split(" ");
                if (ccgcatIDs.containsKey(splits[0]) && eSize == config.embeddingSize) {
                    int index = getCCGCatID(splits[0]);
                    for (int i = 0; i < eSize; ++i)
                        E[index][i] = Double.parseDouble(splits[i + 1]);
                }
            }

            boolean copyLayer1 = hSize == config.hiddenSize && config.embeddingSize == eSize
                    && config.numTokens == nTokens;
            if (copyLayer1) {
                System.err.println("Copying parameters W1 && b1...");
            }
            for (int j = 0; j < eSize * nTokens; ++j) {
                s = input.readLine();
                if (copyLayer1) {
                    splits = s.split(" ");
                    for (int i = 0; i < hSize; ++i)
                        W1[i][j] = Double.parseDouble(splits[i]);
                }
            }

            s = input.readLine();
            if (copyLayer1) {
                splits = s.split(" ");
                for (int i = 0; i < hSize; ++i)
                    b1[i] = Double.parseDouble(splits[i]);
            }

            boolean copyLayer2 = (nLabel * 2 - 1 == actsList.size()) && hSize == config.hiddenSize;
            if (copyLayer2)
                System.err.println("Copying parameters W2...");
            for (int j = 0; j < hSize; ++j) {
                s = input.readLine();
                if (copyLayer2) {
                    splits = s.split(" ");
                    for (int i = 0; i < nLabel * 2 - 1; ++i)
                        W2[i][j] = Double.parseDouble(splits[i]);
                }
            }
            input.close();
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }
    Dataset trainSet = genTrainExamples(trainSents, trainTrees);
    classifier = new Classifier(config, trainSet, E, W1, b1, W2, preComputed);
}

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

private void loadModelFile(String modelFile, boolean verbose) throws IOException {
    Timing t = new Timing();
    try {/*  w w w  .  j a va 2 s. c  o m*/

        System.err.println("Loading ccg parser model file: " + modelFile + " ... ");
        String s;
        BufferedReader input = IOUtils.readerFromString(modelFile);

        s = input.readLine();
        int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nccgCat = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nPreComputed = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int classes = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nuRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nbRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nrRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));

        actsMap = new HashMap<>();
        knownWords = new ArrayList<>();
        knownPos = new ArrayList<>();
        knownCCGCats = new ArrayList<>();
        srparser = new NonInc();

        double[][] E = new double[nDict + nPOS + nccgCat][eSize];
        String[] splits;
        int index = 0;

        for (int k = 0; k < classes; k++) {
            s = input.readLine().trim();
            splits = s.split("--");
            actsMap.put(ArcJAction.make(SRAction.valueOf(splits[0]), Integer.parseInt(splits[1]), splits[2],
                    RuleType.valueOf(splits[3])), k);
        }

        for (int k = 0; k < nuRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), null,
                        CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]),
                        Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addUnaryRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nbRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]),
                        CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]),
                        parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addBinaryRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nrRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]),
                        CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]),
                        parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addRevealRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nDict; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownWords.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        for (int k = 0; k < nPOS; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownPos.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        for (int k = 0; k < nccgCat; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownCCGCats.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        generateIDs();

        double[][] W1 = new double[hSize][eSize * nTokens];
        for (int j = 0; j < W1[0].length; ++j) {
            s = input.readLine();
            splits = s.split(" ");
            for (int i = 0; i < W1.length; ++i)
                W1[i][j] = Double.parseDouble(splits[i]);
        }

        double[] b1 = new double[hSize];
        s = input.readLine();
        splits = s.split(" ");
        for (int i = 0; i < b1.length; ++i)
            b1[i] = Double.parseDouble(splits[i]);

        double[][] W2 = new double[classes][hSize];
        for (int j = 0; j < W2[0].length; ++j) {
            s = input.readLine();
            splits = s.split(" ");
            for (int i = 0; i < W2.length; ++i)
                W2[i][j] = Double.parseDouble(splits[i]);
        }

        preComputed = new ArrayList<Integer>();
        while (preComputed.size() < nPreComputed) {
            s = input.readLine();
            splits = s.split(" ");
            for (String split : splits) {
                preComputed.add(Integer.parseInt(split));
            }
        }
        input.close();
        classifier = new Classifier(config, E, W1, b1, W2, preComputed);
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }

    // initialize the loaded parser

    // Pre-compute matrix multiplications
    if (config.numPreComputed > 0) {
        classifier.preCompute();
    }
    t.done("Initializing ccg parser");
}

From source file:ilcc.ccgparser.test.IncExtractProb.java

private void loadModelFile(String modelFile, boolean verbose) throws IOException {
    Timing t = new Timing();
    try {//  ww w .ja v a2  s .  c om

        System.err.println("Loading ccg parser model file: " + modelFile + " ... ");
        String s;
        BufferedReader input = IOUtils.readerFromString(modelFile);

        s = input.readLine();
        int nDict = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nPOS = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nccgCat = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int eSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int hSize = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nTokens = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nPreComputed = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int classes = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nuRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nbRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));
        s = input.readLine();
        int nrRules = Integer.parseInt(s.substring(s.indexOf('=') + 1));

        actsMap = new HashMap<>();
        knownWords = new ArrayList<>();
        knownPos = new ArrayList<>();
        knownCCGCats = new ArrayList<>();

        double[][] E = new double[nDict + nPOS + nccgCat][eSize];
        String[] splits;
        int index = 0;

        for (int k = 0; k < classes; k++) {
            s = input.readLine().trim();
            splits = s.split("--");
            actsMap.put(ArcJAction.make(SRAction.valueOf(splits[0]), Integer.parseInt(splits[1]), splits[2],
                    RuleType.valueOf(splits[3])), k);
        }

        for (int k = 0; k < nuRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]), null,
                        CCGcat.ccgCatFromString(parts[2]), parts[3].equals("true"), RuleType.valueOf(parts[4]),
                        Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addUnaryRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nbRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]),
                        CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]),
                        parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addBinaryRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nrRules; k++) {
            s = input.readLine().trim();
            splits = s.split("  ");
            String key = splits[0];
            for (int i = 1; i < splits.length; i++) {
                String[] parts = splits[i].split("--");
                CCGJRuleInfo info = new CCGJRuleInfo(CCGcat.ccgCatFromString(parts[0]),
                        CCGcat.ccgCatFromString(parts[1]), CCGcat.ccgCatFromString(parts[2]),
                        parts[3].equals("true"), RuleType.valueOf(parts[4]), Integer.parseInt(parts[5]), 0);
                srparser.treebankRules.addRevealRuleInfo(info, key);
            }
        }

        for (int k = 0; k < nDict; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownWords.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        for (int k = 0; k < nPOS; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownPos.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        for (int k = 0; k < nccgCat; ++k) {
            s = input.readLine();
            splits = s.split(" ");
            knownCCGCats.add(splits[0]);
            for (int i = 0; i < eSize; ++i)
                E[index][i] = Double.parseDouble(splits[i + 1]);
            index = index + 1;
        }
        generateIDs();

        double[][] W1 = new double[hSize][eSize * nTokens];
        for (int j = 0; j < W1[0].length; ++j) {
            s = input.readLine();
            splits = s.split(" ");
            for (int i = 0; i < W1.length; ++i)
                W1[i][j] = Double.parseDouble(splits[i]);
        }

        double[] b1 = new double[hSize];
        s = input.readLine();
        splits = s.split(" ");
        for (int i = 0; i < b1.length; ++i)
            b1[i] = Double.parseDouble(splits[i]);

        double[][] W2 = new double[classes][hSize];
        for (int j = 0; j < W2[0].length; ++j) {
            s = input.readLine();
            splits = s.split(" ");
            for (int i = 0; i < W2.length; ++i)
                W2[i][j] = Double.parseDouble(splits[i]);
        }

        preComputed = new ArrayList<Integer>();
        while (preComputed.size() < nPreComputed) {
            s = input.readLine();
            splits = s.split(" ");
            for (String split : splits) {
                preComputed.add(Integer.parseInt(split));
            }
        }
        input.close();
        classifier = new Classifier(config, E, W1, b1, W2, preComputed);
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }

    // initialize the loaded parser

    // Pre-compute matrix multiplications
    if (config.numPreComputed > 0) {
        classifier.preCompute();
    }
    t.done("Initializing ccg parser");
}