Example usage for edu.stanford.nlp.util StringUtils join

List of usage examples for edu.stanford.nlp.util StringUtils join

Introduction

In this page you can find the example usage for edu.stanford.nlp.util StringUtils join.

Prototype

public static String join(String[] items, String glue) 

Source Link

Document

Joins each element in the given array with the given glue.

Usage

From source file:com.romeikat.datamessie.core.processing.service.fulltext.index.IndexBuilder.java

License:Open Source License

private String getClassNames() {
    final List<String> classNames = Lists.newLinkedList();
    for (final Class<?> classToIndex : classesToIndex) {
        final String className = classToIndex.getSimpleName();
        classNames.add(className);//from   w  w  w .  j av a2s . co  m
    }
    return StringUtils.join(classNames, ", ");
}

From source file:com.romeikat.datamessie.core.processing.service.fulltext.query.OutOfQuery.java

License:Open Source License

@Override
public String toString() {
    final StringBuilder stringBuilder = new StringBuilder();
    stringBuilder.append("OutOfQuery: ");
    stringBuilder.append("k = ");
    stringBuilder.append(k);/*from w  w w  . ja  va2  s.co  m*/
    stringBuilder.append(", queryTerms = ");
    stringBuilder.append(StringUtils.join(queryTerms, " "));
    return stringBuilder.toString();
}

From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.Pipeline.java

License:Open Source License

public static void main(String[] args) throws Exception {
    DATASET_DIR = DKProContext.getContext().getWorkspace().getAbsolutePath() + "/Datasets/###/ds";
    GOLDSTANDARD_DIR = DKProContext.getContext().getWorkspace().getAbsolutePath() + "/Datasets/###/gs";

    Options options = new Options();
    options.addOption("d", "dataset", true, "dataset to evaluate: " + StringUtils.join(Dataset.values(), ", "));
    options.addOption("c", "classifier", true,
            "classifier to use: " + StringUtils.join(WekaClassifier.values(), ", "));

    CommandLineParser parser = new PosixParser();
    try {/*from   w  w w  .  ja  v  a2  s. co  m*/
        CommandLine cmd = parser.parse(options, args);

        if (cmd.hasOption("d") && cmd.hasOption("c")) {
            Dataset dataset = Dataset.valueOf(cmd.getOptionValue("d"));
            WekaClassifier wekaClassifier = WekaClassifier.valueOf(cmd.getOptionValue("c"));

            runCV(dataset, wekaClassifier);
        } else {
            new HelpFormatter().printHelp(Pipeline.class.getSimpleName(), options, true);
        }
    } catch (ParseException e) {
        new HelpFormatter().printHelp(Pipeline.class.getSimpleName(), options);
    }
}

From source file:eu.eexcess.diversityasurement.iaselect.MessageCategories.java

License:Open Source License

@Override
public String toString() {
    return "MessageCategories [categories=" + StringUtils.join(categories, ", ") + "]";
}

From source file:ie.pars.bnc.preprocess.ProcessNLP.java

License:Open Source License

/**
 * The return is the line consists of the word lemma tag depRelatoin
 * distanceToGoverner GovWord GovLemma GovTag
 *
 * @param index//  w w  w .  ja  v a  2  s  .  c  o m
 * @param token
 * @param m
 * @param head
 * @param deprel
 * @param headWord
 * @return
 */
private static String line(int index, TaggedWord token, Morphology m, int head, String deprel,
        TaggedWord headWord) {
    ArrayList<String> fields = new ArrayList<>(16);

    // fields.add(Integer.toString(index)); // 1
    fields.add(orNull(token.word())); // 2
    fields.add(orNull(m.lemma(token.word(), token.tag(), true))); // 3
    fields.add(orNull(token.tag())); // 4
    // fields.add(orNull(token.ner()));     // 5
    fields.add(deprel);

    if (head == 0) {
        fields.add(Integer.toString(head)); // 6
        fields.add(NULL_PLACEHOLDER);
        fields.add(NULL_PLACEHOLDER);
        fields.add(NULL_PLACEHOLDER);

    } else if (head > 0) {
        fields.add(Integer.toString(head - index)); // 6
        fields.add(headWord.word());
        fields.add(m.lemma(headWord.word(), headWord.tag(), true));
        fields.add(headWord.tag());

    } else {
        fields.add(NULL_PLACEHOLDER);
        fields.add(NULL_PLACEHOLDER);
        fields.add(NULL_PLACEHOLDER);
        fields.add(NULL_PLACEHOLDER);
        fields.add(NULL_PLACEHOLDER);
    }

    return StringUtils.join(fields, "\t");
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * Run and score coref distributed//from www  . j  a v a 2  s.c  o m
 */
public static void runAndScoreCorefDist(String runDistCmd, Properties props, String propsFile)
        throws Exception {
    PrintWriter pw = IOUtils.getPrintWriter(propsFile);
    props.store(pw, null);
    pw.close();
    /* Run coref job in a distributed manner, score is written to file */
    List<String> cmd = new ArrayList<String>();
    cmd.addAll(Arrays.asList(runDistCmd.split("\\s+")));
    cmd.add("-props");
    cmd.add(propsFile);
    ProcessBuilder pb = new ProcessBuilder(cmd);
    // Copy environment variables over
    Map<String, String> curEnv = System.getenv();
    Map<String, String> pbEnv = pb.environment();
    pbEnv.putAll(curEnv);

    logger.info("Running distributed coref:" + StringUtils.join(pb.command(), " "));
    StringWriter outSos = new StringWriter();
    StringWriter errSos = new StringWriter();
    PrintWriter out = new PrintWriter(new BufferedWriter(outSos));
    PrintWriter err = new PrintWriter(new BufferedWriter(errSos));
    SystemUtils.run(pb, out, err);
    out.close();
    err.close();
    String outStr = outSos.toString();
    String errStr = errSos.toString();
    logger.info("Finished distributed coref: " + runDistCmd + ", props=" + propsFile);
    logger.info("Output: " + outStr);
    if (errStr.length() > 0) {
        logger.info("Error: " + errStr);
    }
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * Given a set of sieves, select an optimal ordering for the sieves by
 * iterating over sieves, and selecting the one that gives the best score
 * and adding sieves one at a time until no more sieves left
 *//*from  w w w . java 2 s.c om*/
public void optimizeSieveOrdering(MentionExtractor mentionExtractor, Properties props, String timestamp)
        throws Exception {
    logger.info("=============SIEVE OPTIMIZATION START ====================");
    logger.info("Optimize sieves using score: " + optimizeScoreType);
    FileFilter scoreFilesFilter = new FileFilter() {
        @Override
        public boolean accept(File file) {
            return file.getAbsolutePath().endsWith(".score");
        }

        public String toString() {
            return ".score";
        }
    };
    Pattern scoreFilePattern = Pattern.compile(".*sieves\\.(\\d+)\\.(\\d+).score");
    String runDistributedCmd = props.getProperty(Constants.RUN_DIST_CMD_PROP);
    String mainWorkDirPath = props.getProperty(Constants.RUN_DIST_CMD_WORK_DIR, "workdir") + "-" + timestamp
            + File.separator;

    double mult = 0.007;
    double dDown[] = { -1, 1, 0, 0 };
    double dUp[] = { 0, 0, -1, 1 };
    double dUpPre[] = { 0, 0, 0, 0 };

    double bestScore = runAndScoreCoref(this, mentionExtractor, props, timestamp);
    for (int it = 0; it < 20; it++) {

        String workDirPath = mainWorkDirPath + it + File.separator;
        File workDir = new File(workDirPath);
        workDir.mkdirs();
        workDirPath = workDir.getAbsolutePath() + File.separator;

        boolean changed = false;
        for (int i = 0; i < dUp.length; i++) {
            double dUpV = dUp[i] * mult;
            double dDownV = dDown[i] * mult;
            double dUpPreV = dUpPre[i] * mult;
            if (dUpV > 1)
                dUpV = 1;
            if (dDownV < 0)
                dDownV = 0;
            if (dUpPreV > 1)
                dUpPreV = 1;

            ((ClassifierSieve) sieves[0]).inconsistencyThreshold += dDownV;
            ((ClassifierSieve) sieves[0]).matchThreshol += dUpPreV;
            ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol += dUpV;

            double score = runAndScoreCoref(this, mentionExtractor, props, timestamp);
            if (score > bestScore) {
                bestScore = score;
                logger.info("new best" + bestScore);
                logger.info("down : " + ((ClassifierSieve) sieves[0]).inconsistencyThreshold + " up:"
                        + ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol);
                changed = true;
            } else if (score < bestScore) {
                logger.info("back from down : " + ((ClassifierSieve) sieves[0]).inconsistencyThreshold + " up:"
                        + ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol);

                ((ClassifierSieve) sieves[0]).inconsistencyThreshold -= dDownV;
                ((ClassifierSieve) sieves[0]).matchThreshol -= dUpPreV;
                ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol -= dUpV;
                logger.info("to down : " + ((ClassifierSieve) sieves[0]).inconsistencyThreshold + " up:"
                        + ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol);

            }
        }
        if (!changed)
            mult *= 0.5;
    }
    logger.info("Final Sieve Ordering: " + StringUtils.join(sieveClassNames, ","));
    logger.info("=============SIEVE OPTIMIZATION DONE ====================");
}

From source file:lv.pipe.NerTagger.java

License:Open Source License

private void merge(List<Annotation> flat, List<CoreLabel> result) {
    if (flat.size() != result.size()) {
        System.err.println("Warning: not equal result and annotation set");
    }/* w  ww. ja v  a 2  s  . co m*/
    for (int i = 0; i < flat.size(); i++) {
        Annotation a = flat.get(i);
        CoreLabel wi = result.get(i);
        if (a.getText().equals(BOUNDARY))
            continue;

        String answer = wi.get(NamedEntityTagAnnotation.class);
        if (answer == null) {
            answer = wi.get(AnswerAnnotation.class);
            if (answer == null)
                answer = "O";
        }
        if (answer != "O")
            a.setNer(answer);

        // Add extra gazetier features used by NER
        String morphoFeats = a.get(LabelMorphoFeatures.class, "").trim();
        if (wi.get(DistSimAnnotation.class) != null)
            morphoFeats += "|Distsim=" + wi.getString(DistSimAnnotation.class);
        if (wi.get(LVGazAnnotation.class) != null && wi.get(LVGazAnnotation.class).size() > 0)
            morphoFeats += "|Gaz=" + StringUtils.join(wi.get(LVGazAnnotation.class), ",");
        if (wi.get(LVGazFileAnnotation.class) != null && wi.get(LVGazFileAnnotation.class).size() > 0)
            morphoFeats += "|GazFile=" + StringUtils.join(wi.get(LVGazFileAnnotation.class), ",");
        a.set(LabelMorphoFeatures.class, morphoFeats);
    }
}

From source file:org.purl.net.wonderland.nlp.CollocationsManager.java

License:Open Source License

static List<WTagging> buildSentenceWithCollocations(List<WTagging> sentence, List<TaggedWord> pennTags) {
    List<WTagging> newSentence = new ArrayList<WTagging>();
    int p = 0;/*from  w  ww  .j a  va 2  s.  c  o m*/
    for (int s = 0; s < sentence.size(); ++s) {
        WTagging wt = sentence.get(s);
        TaggedWord pt = pennTags.get(p);
        String wWord = wt.word();
        String pWord = pt.word();
        if (wWord.equals(pWord)) {
            WTagging tagging = new WTagging();
            MorphAdornerWrapper.copyAdornedWord(tagging, wt);
            tagging.setPennTag(pt.tag());
            newSentence.add(tagging);
            ++p;
        } else {
            WTagging tagging = new WTagging();
            tagging.setWrittenForm(pWord);

            String[] words = pWord.split("_");
            words[0] = wt.getLemma();
            StringBuilder ma = new StringBuilder(wt.getPartsOfSpeech());
            for (int i = 1; i < words.length; ++i) {
                ++s;
                wt = sentence.get(s);
                ma.append(MorphAdornerWrapper.lemmaSeparator);
                ma.append(wt.getPartsOfSpeech());
                words[i] = wt.getLemma();
            }

            tagging.setLemma(StringUtils.join(words, "_"));
            tagging.setPennTag(pt.tag());
            tagging.setPartsOfSpeech(ma.toString());
            tagging.setCollocation(true);
            newSentence.add(tagging);
            ++p;
        }
    }
    return newSentence;
}

From source file:org.purl.net.wonderland.nlp.resources.WordNetWrapper.java

License:Open Source License

public static void listCollocations() {
    try {/*  www  . j av a  2  s.c o  m*/
        Map<String, List<String>> all = new Hashtable<String, List<String>>();

        Iterator<IndexWord> it = dict.getIndexWordIterator(POS.ADJECTIVE);
        while (it.hasNext()) {
            IndexWord w = it.next();
            String lemma = w.getLemma();
            if (lemma.indexOf(" ") >= 0) {
                lemma = StringUtils.join(lemma.split(" "), "_");
                if (!all.containsKey(lemma)) {
                    all.put(lemma, new ArrayList<String>());
                }
                all.get(lemma).add("adjective");
            }
        }

        it = dict.getIndexWordIterator(POS.NOUN);
        while (it.hasNext()) {
            IndexWord w = it.next();
            String lemma = w.getLemma();
            if (lemma.indexOf(" ") >= 0) {
                lemma = StringUtils.join(lemma.split(" "), "_");
                if (!all.containsKey(lemma)) {
                    all.put(lemma, new ArrayList<String>());
                }
                all.get(lemma).add("Nn");
            }
        }

        it = dict.getIndexWordIterator(POS.ADVERB);
        while (it.hasNext()) {
            IndexWord w = it.next();
            String lemma = w.getLemma();
            if (lemma.indexOf(" ") >= 0) {
                lemma = StringUtils.join(lemma.split(" "), "_");
                if (!all.containsKey(lemma)) {
                    all.put(lemma, new ArrayList<String>());
                }
                all.get(lemma).add("adverb");
            }
        }

        it = dict.getIndexWordIterator(POS.VERB);
        while (it.hasNext()) {
            IndexWord w = it.next();
            String lemma = w.getLemma();
            if (lemma.indexOf(" ") >= 0) {
                lemma = StringUtils.join(lemma.split(" "), "_");
                if (!all.containsKey(lemma)) {
                    all.put(lemma, new ArrayList<String>());
                }
                all.get(lemma).add("verb");
            }
        }

        for (String lemma : all.keySet()) {
            List<String> types = all.get(lemma);
            System.out.println(lemma + "," + StringUtils.join(types.toArray(new String[] {}), "|"));
        }

    } catch (JWNLException ex) {
        W.handleException(ex);
    }
}