Example usage for edu.stanford.nlp.util StringUtils argsToProperties

List of usage examples for edu.stanford.nlp.util StringUtils argsToProperties

Introduction

In this page you can find the example usage for edu.stanford.nlp.util StringUtils argsToProperties.

Prototype

public static Properties argsToProperties(String... args) 

Source Link

Document

In this version each flag has zero or one argument.

Usage

From source file:DependencyParse.java

License:Apache License

public static void main(String[] args) throws Exception {
    Properties props = StringUtils.argsToProperties(args);
    if (!props.containsKey("tokpath") || !props.containsKey("parentpath") || !props.containsKey("relpath")) {
        System.err.println(//from   ww  w. j a va 2 s  . c  o  m
                "usage: java DependencyParse -tokenize - -tokpath <tokpath> -parentpath <parentpath> -relpath <relpath>");
        System.exit(1);
    }

    boolean tokenize = false;
    if (props.containsKey("tokenize")) {
        tokenize = true;
    }

    String tokPath = props.getProperty("tokpath");
    String parentPath = props.getProperty("parentpath");
    String relPath = props.getProperty("relpath");

    BufferedWriter tokWriter = new BufferedWriter(new FileWriter(tokPath));
    BufferedWriter parentWriter = new BufferedWriter(new FileWriter(parentPath));
    BufferedWriter relWriter = new BufferedWriter(new FileWriter(relPath));

    MaxentTagger tagger = new MaxentTagger(TAGGER_MODEL);
    DependencyParser parser = DependencyParser.loadFromModelFile(PARSER_MODEL);
    Scanner stdin = new Scanner(System.in);
    int count = 0;
    long start = System.currentTimeMillis();
    while (stdin.hasNextLine()) {
        String line = stdin.nextLine();
        List<HasWord> tokens = new ArrayList<>();
        if (tokenize) {
            PTBTokenizer<Word> tokenizer = new PTBTokenizer(new StringReader(line), new WordTokenFactory(), "");
            for (Word label; tokenizer.hasNext();) {
                tokens.add(tokenizer.next());
            }
        } else {
            for (String word : line.split(" ")) {
                tokens.add(new Word(word));
            }
        }

        List<TaggedWord> tagged = tagger.tagSentence(tokens);

        int len = tagged.size();
        Collection<TypedDependency> tdl = parser.predict(tagged).typedDependencies();
        int[] parents = new int[len];
        for (int i = 0; i < len; i++) {
            // if a node has a parent of -1 at the end of parsing, then the node
            // has no parent.
            parents[i] = -1;
        }

        String[] relns = new String[len];
        for (TypedDependency td : tdl) {
            // let root have index 0
            int child = td.dep().index();
            int parent = td.gov().index();
            relns[child - 1] = td.reln().toString();
            parents[child - 1] = parent;
        }

        // print tokens
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < len - 1; i++) {
            if (tokenize) {
                sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word()));
            } else {
                sb.append(tokens.get(i).word());
            }
            sb.append(' ');
        }
        if (tokenize) {
            sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word()));
        } else {
            sb.append(tokens.get(len - 1).word());
        }
        sb.append('\n');
        tokWriter.write(sb.toString());

        // print parent pointers
        sb = new StringBuilder();
        for (int i = 0; i < len - 1; i++) {
            sb.append(parents[i]);
            sb.append(' ');
        }
        sb.append(parents[len - 1]);
        sb.append('\n');
        parentWriter.write(sb.toString());

        // print relations
        sb = new StringBuilder();
        for (int i = 0; i < len - 1; i++) {
            sb.append(relns[i]);
            sb.append(' ');
        }
        sb.append(relns[len - 1]);
        sb.append('\n');
        relWriter.write(sb.toString());

        count++;
        if (count % 1000 == 0) {
            double elapsed = (System.currentTimeMillis() - start) / 1000.0;
            System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed);
        }
    }

    long totalTimeMillis = System.currentTimeMillis() - start;
    System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n", count, totalTimeMillis / 1000.0,
            totalTimeMillis / (double) count);
    tokWriter.close();
    parentWriter.close();
    relWriter.close();
}

From source file:ConstituencyParse.java

License:Apache License

public static void main(String[] args) throws Exception {
    Properties props = StringUtils.argsToProperties(args);
    if (!props.containsKey("parentpath")) {
        System.err.println(/*  ww w.java 2s.co m*/
                "usage: java ConstituencyParse -deps - -tokenize - -tokpath <tokpath> -parentpath <parentpath>");
        System.exit(1);
    }

    // whether to tokenize input sentences
    boolean tokenize = false;
    if (props.containsKey("tokenize")) {
        tokenize = true;
    }

    // whether to produce dependency trees from the constituency parse
    boolean deps = false;
    if (props.containsKey("deps")) {
        deps = true;
    }

    String tokPath = props.containsKey("tokpath") ? props.getProperty("tokpath") : null;
    String parentPath = props.getProperty("parentpath");
    ConstituencyParse processor = new ConstituencyParse(tokPath, parentPath, tokenize);

    Scanner stdin = new Scanner(System.in);
    int count = 0;
    long start = System.currentTimeMillis();
    while (stdin.hasNextLine()) {
        String line = stdin.nextLine();
        List<HasWord> tokens = processor.sentenceToTokens(line);
        Tree parse = processor.parse(tokens);

        // produce parent pointer representation
        int[] parents = deps ? processor.depTreeParents(parse, tokens) : processor.constTreeParents(parse);

        // print
        if (tokPath != null) {
            processor.printTokens(tokens);
        }
        processor.printParents(parents);

        count++;
        if (count % 1000 == 0) {
            double elapsed = (System.currentTimeMillis() - start) / 1000.0;
            System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed);
        }
    }

    long totalTimeMillis = System.currentTimeMillis() - start;
    System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n", count, totalTimeMillis / 1000.0,
            totalTimeMillis / (double) count);
    processor.close();
}

From source file:com.koalephant.nlp.RegexNERValidator.java

License:Open Source License

/**
 * Main method/* w ww .j a  va2  s  . co  m*/
 *
 * @param args program arguments, mostly -properties
 * @throws RuntimeException
 */
public static void main(String args[]) throws RuntimeException {

    if (args.length == 0) {
        throw new RuntimeException("No arguments specified");
    }

    Properties properties = StringUtils.argsToProperties(args);

    if (properties.containsKey("inFile")) {
        if (properties.containsKey("outFile")) {
            quoteMappings(properties.getProperty("inFile"), properties.getProperty("outFile"));
        } else {
            quoteMappings(properties.getProperty("inFile"));
        }
    } else {
        testMappings(properties);
    }
}

From source file:com.koalephant.nlp.StanfordCoreNLPHTTPServer.java

License:Open Source License

/**
 * Start the Server/*from w w  w  .jav  a 2  s .  c o  m*/
 *
 * @param args command line arguments
 * @throws Exception
 */
public static void main(String args[]) throws Exception {
    Properties props = new Properties();
    // Load properties from the command line
    if (args.length > 0) {
        log.info("Reading Opts...");
        props = StringUtils.argsToProperties(args);
    }

    // use -host if given
    host = props.getProperty("host", host);
    props.remove("host");

    String portOpt = props.getProperty("port");
    // use -port if given
    if (portOpt != null) {
        try {
            props.remove("port");
            port = Integer.parseInt(portOpt);
        } catch (Exception e) {
            System.err.println("Invalid port specified: " + portOpt);
            System.exit(1);
        }
    }

    String defaultTypeStr = props.getProperty("defaultType");
    if (defaultTypeStr != null) {
        try {
            defaultType = MediaType.getFromType(defaultTypeStr, defaultType);
        } catch (IllegalArgumentException e) {
            System.err.println(e.getMessage());
            System.exit(1);
        }
    }

    // start the server
    Container container = new StanfordCoreNLPHTTPServer(new StanfordCoreNLP(props));

    log.info("Attempting to listen on " + host + ":" + port + ".");

    Server server = new ContainerServer(container);
    Connection connection = new SocketConnection(server);
    SocketAddress address = new InetSocketAddress(host, port);
    connection.connect(address);

    log.info("Initialized server at " + host + ":" + port + ".");
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordPosTagger.java

License:Open Source License

@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);

    modelProvider = new ModelProviderBase<MaxentTagger>() {
        {/*from  w  w  w.ja  v  a  2  s  .co  m*/
            setContextObject(StanfordPosTagger.this);

            setDefault(ARTIFACT_ID, "${groupId}.stanfordnlp-model-tagger-${language}-${variant}");
            setDefault(LOCATION, "classpath:/${package}/lib/tagger-${language}-${variant}.properties");
            setDefaultVariantsLocation("${package}/lib/tagger-default-variants.map");

            setOverride(LOCATION, modelLocation);
            setOverride(LANGUAGE, language);
            setOverride(VARIANT, variant);
        }

        @Override
        protected MaxentTagger produceResource(URL aUrl) throws IOException {
            String modelFile = aUrl.toString();
            MaxentTagger tagger = new MaxentTagger(modelFile,
                    StringUtils.argsToProperties(new String[] { "-model", modelFile }), false);

            SingletonTagset tags = new SingletonTagset(POS.class,
                    getResourceMetaData().getProperty(("pos.tagset")));
            tags.addAll(tagger.tagSet());
            addTagset(tags);

            if (printTagSet) {
                getContext().getLogger().log(INFO, getTagset().toString());
            }

            return tagger;
        }
    };

    posMappingProvider = MappingProviderFactory.createPosMappingProvider(posMappingLocation, language,
            modelProvider);
    posMappingProvider.setDefaultVariantsLocation(
            "de/tudarmstadt/ukp/dkpro/core/stanfordnlp/lib/tagger-default-variants.map");
}

From source file:edu.cmu.geolocator.nlp.ner.StanfordCRF.CRF_Learn.java

License:Apache License

public static void main(String[] argc) throws Exception {
    //StringUtils.printErrInvocationString("CRFClassifier", args);

    String[] args = new String[2];
    args[0] = "-prop";
    args[1] = "src/edu/cmu/geoparser/nlptools/ner/StanfordCRF/train.prop";

    Properties props = StringUtils.argsToProperties(args);
    CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(props);
    String testFile = crf.flags.testFile;
    String testFiles = crf.flags.testFiles;
    String textFile = crf.flags.textFile;
    String textFiles = crf.flags.textFiles;
    String loadPath = crf.flags.loadClassifier;
    String loadTextPath = crf.flags.loadTextClassifier;
    String serializeTo = crf.flags.serializeTo;
    String serializeToText = crf.flags.serializeToText;

    if (loadPath != null) {
        crf.loadClassifierNoExceptions(loadPath, props);
    } else if (loadTextPath != null) {
        System.err.println("Warning: this is now only tested for Chinese Segmenter");
        System.err.println("(Sun Dec 23 00:59:39 2007) (pichuan)");
        try {/*www .  j  a  v a 2s  .co m*/
            crf.loadTextClassifier(loadTextPath, props);
            // System.err.println("DEBUG: out from crf.loadTextClassifier");
        } catch (Exception e) {
            throw new RuntimeException("error loading " + loadTextPath, e);
        }
    } else if (crf.flags.loadJarClassifier != null) {
        crf.loadJarClassifier(crf.flags.loadJarClassifier, props);
    } else if (crf.flags.trainFile != null || crf.flags.trainFileList != null) {

        //Wei Zhang: This is where the program starts to train.
        crf.train();
        ////////////

    } else {
        crf.loadDefaultClassifier();
    }

    // System.err.println("Using " + crf.flags.featureFactory);
    // System.err.println("Using " +
    // StringUtils.getShortClassName(crf.readerAndWriter));

    if (serializeTo != null) {

        //Wei Zhang: This is used.
        crf.serializeClassifier(serializeTo);
        /////////////////////////
    }

    if (serializeToText != null) {
        crf.serializeTextClassifier(serializeToText);
    }

    if (testFile != null) {
        DocumentReaderAndWriter<CoreLabel> readerAndWriter = crf.defaultReaderAndWriter();
        if (crf.flags.searchGraphPrefix != null) {
            crf.classifyAndWriteViterbiSearchGraph(testFile, crf.flags.searchGraphPrefix,
                    crf.makeReaderAndWriter());
        } else if (crf.flags.printFirstOrderProbs) {
            crf.printFirstOrderProbs(testFile, readerAndWriter);
        } else if (crf.flags.printProbs) {
            crf.printProbs(testFile, readerAndWriter);
        } else if (crf.flags.useKBest) {
            int k = crf.flags.kBest;
            crf.classifyAndWriteAnswersKBest(testFile, k, readerAndWriter);
        } else if (crf.flags.printLabelValue) {
            crf.printLabelInformation(testFile, readerAndWriter);
        } else {
            crf.classifyAndWriteAnswers(testFile, readerAndWriter);
        }
    }

    if (testFiles != null) {
        List<File> files = new ArrayList<File>();
        for (String filename : testFiles.split(",")) {
            files.add(new File(filename));
        }
        crf.classifyAndWriteAnswers(files, crf.defaultReaderAndWriter());
    }

    if (textFile != null) {
        crf.classifyAndWriteAnswers(textFile);
    }

    if (textFiles != null) {
        List<File> files = new ArrayList<File>();
        for (String filename : textFiles.split(",")) {
            files.add(new File(filename));
        }
        crf.classifyAndWriteAnswers(files);
    }

    if (crf.flags.readStdin) {
        crf.classifyStdin();
    }
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * Needs the following properties: -props 'Location of coref.properties'
 *
 * @throws Exception/*from ww  w  .ja  va2 s .c  om*/
 */
public static void main(String[] args) throws Exception {
    Properties props = StringUtils.argsToProperties(args);
    initializeAndRunCoref(props);
}

From source file:lv.lumii.ner.analysis.ClassifierComparator.java

License:Open Source License

public static void main(String[] args) throws ClassCastException, ClassNotFoundException, IOException {
    Properties props = new Properties();
    props = StringUtils.argsToProperties(args);
    inputFile = props.getProperty("inputFile", null);
    String propString = "" + "-loadClassifier lv-ner-model.ser.gz"
    //+ " -whiteList Gazetteer/DB_persons.txt,Gazetteer/DB_locations.txt,Gazetteer/DB_professions.txt,Gazetteer/Laura_partijas_lem.txt,Gazetteer/AZ_valsts_parvaldes_struktura_lem.txt,D:/LUMII/ner/improve_9.1.2014/leta-2014-jan/export/stats/DB_organizations2.txt"
    //+ "-loadClassifier D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-1.ser.gz;D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-2.ser.gz;D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-3.ser.gz"
    //";D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-4.ser.gz;D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-5.ser.gz"
            + "";
    //props = StringUtils.argsToProperties(propString.split(" "));
    System.err.println(props);/*  www. j  a v a  2s  .  com*/
    ClassifierComparator cc = new ClassifierComparator(props);

    for (Integer i = 0; i < 10; i++) {
        if (props.containsKey("classifier" + i.toString())) {
            System.err.println("classifier" + i.toString());
            String[] parts = props.getProperty("classifier" + i.toString()).split("\\s*\\|\\s*");
            List<AbstractSequenceClassifier<CoreLabel>> nccl = new ArrayList<>();
            for (String part : parts) {
                System.err.println("\t" + part);
                String[] items = part.split("\\s*=\\s*");
                System.err.println("\t" + part);
                if (items[0].equalsIgnoreCase("whiteList")) {
                    System.err.println("\twhiteList" + items[1]);
                    nccl.add(new ListNERSequenceClassifier(items[1], true, true, true));
                }
                if (items[0].equalsIgnoreCase("loadClassifier")) {
                    System.err.println("\tloadClassifier" + items[1]);
                    nccl.add(CRFClassifier.getClassifier(items[1], props));
                }
                if (items[0].equalsIgnoreCase("regexList")) {
                    System.err.println("\tregexListt" + items[1]);
                    nccl.add(new RegexNERSequenceClassifier(items[1], true, true));
                }
            }
            cc.addClassifier(nccl);
        }
    }

    if (props.containsKey("whiteList")) {
        String whiteListString = props.getProperty("whiteList");
        String whiteLists[] = whiteListString.split(";");
        for (String whiteList : whiteLists) {
            whiteList = whiteList.trim();
            cc.addClassifier(new ListNERSequenceClassifier(whiteList, true, true, true));
        }
    }

    if (props.containsKey("loadClassifier")) {
        String loadClassifierString = props.getProperty("loadClassifier");
        String loadClassifiers[] = loadClassifierString.split(";");
        for (String loadClassifier : loadClassifiers) {
            loadClassifier = loadClassifier.trim();
            cc.addClassifier(CRFClassifier.getClassifier(loadClassifier, props));
        }
    }

    if (props.containsKey("regexList")) {
        String regexListString = props.getProperty("regexList");
        String regexLists[] = regexListString.split(";");
        for (String regexList : regexLists) {
            regexList = regexList.trim();
            cc.addClassifier(new RegexNERSequenceClassifier(regexList, true, true));
        }
    }

    LVCoNLLDocumentReaderAndWriter reader = new LVCoNLLDocumentReaderAndWriter();
    //List<CoreLabel> doc = reader.readCONLL("D:/LUMII/ner/improve_9.1.2014/leta-2014-jan/export/FB1A480C-5109-4D34-AFDF-FD4B9CC6E790.conll");
    //inputFile = "D:/LUMII/ner/improve_9.1.2014/leta-2014-jan/export/leta-2014-jan-test.tab";

    List<CoreLabel> doc = reader.readCONLL(inputFile);

    cc.classify(doc);

    reader.outputType = LVCoNLLDocumentReaderAndWriter.outputTypes.COMPARE;
    //reader.printAnswers(cc.document, new PrintWriter(System.out));

    cc.entity_stats().preview();
}

From source file:lv.lumii.ner.NerPipe.java

License:Open Source License

public static void main(String[] args) throws IOException, ClassCastException, ClassNotFoundException {
    Properties props = new Properties();
    props = StringUtils.argsToProperties(args);
    if (props.containsKey("h") || props.containsKey("help") || props.containsKey("?")) {
        System.out.println("LV Named Entity Recogniser");
        System.out.println("\nInput formats");
        System.out.println("\tDefault : conll-in");
        System.out.println(/*from  w w w .j  a v a  2s. c  o  m*/
                "\t-conll-in : CONLL shared task data format - one line per token, with tab-delimited columns, sentences separated by blank lines.");
        System.out.println("\nOutput formats");
        System.out.println("\tDefault : conll-x");
        System.out.println(
                "\t-conll-x : CONLL-X shared task data format - one line per token, with tab-delimited columns, sentences separated by blank lines.");
        ;
        System.out.println("\t-simple : Simple compare format used for ner analysis");
        System.out.println("\t-toFeatures : add ner key and value to morphoFeature string");
        System.out.println("\nOther options:");
        System.out.println("\t-saveExtraColumns : save extra columns after typical conll input (6 columns)");
        System.out.println("\t-whiteList : files containing white list named entities (separated by comma)");
        System.out.flush();
        System.exit(0);
    }
    NerPipe ner = new NerPipe(props);
    ner.classifyDocumentStdin();

    //CRFClassifier<CoreLabel> classifier = CRFClassifier.getClassifier(NerPipe.defaultCrfClassifier, props);
    ///LVCoNLLDocumentReaderAndWriter lvconll = new LVCoNLLDocumentReaderAndWriter();
    //lvconll.init(classifier.flags);
    //      ObjectBank<List<CoreLabel>> b = classifier.makeObjectBankFromFile("z_tomins.conll",lvconll);
    //      classifier.printProbsDocuments(b);
    //      classifier.printFirstOrderProbsDocuments(b);
    //try {classifier.printLabelInformation("z_tomins.conll", lvconll);} catch (Exception e) {e.printStackTrace();}
}

From source file:LVCoref.LVCoref.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Properties properties = StringUtils.argsToProperties(args);
    initializeProperties(properties);/*from  ww  w .  j a v a2  s.com*/
    //        if (true) {
    //           parserScoreCompare();
    //           return;
    //        }
    if (Constants.SCORE) {
        scoreDirectory(Constants.SCORE_PATH);
        return;
    }
    if (Constants.TAG_FOLDER) {
        annotateFolder(Constants.TAG_FOLDER_PATH, Constants.TAG_FOLDER_OUT);
        return;
    }

    int docID = 0; // document counter
    BufferedReader in;
    switch (inputType) {
    case CONLL:
        Document d = new Document(dictionaries);
        try {
            d.readCONLL(conllInput);
        } catch (Exception ex) {
            log.severe("Could not read conll file");
            ex.printStackTrace();
            break;
        }
        if (d.tree.size() > 0)
            processDocument(d);
        break;
    case STDIN_JSON:
        in = new BufferedReader(new InputStreamReader(System.in, "UTF8"));
        while (true) {
            Document doc = new Document(dictionaries);
            doc.id = Integer.toString(docID++);
            try {
                doc.readJSON(in);
            } catch (Exception ex) {
                log.severe("Could not read json from stream");
                ex.printStackTrace();
                break;
            }
            if (doc.tree.size() > 0)
                processDocument(doc);
            else
                break;
        }
        break;
    default:
        in = new BufferedReader(new InputStreamReader(System.in, "UTF8"));
        while (true) {
            Document doc = new Document(dictionaries);
            doc.id = Integer.toString(docID++);
            try {
                doc.readCONLL(in);
            } catch (Exception ex) {
                log.severe("Could not read conll from stream");
                ex.printStackTrace();
                break;
            }
            if (doc.tree.size() > 0)
                processDocument(doc);
            else
                break;
        }
    }
}