Example usage for edu.stanford.nlp.util StringUtils join

Introduction

In this page you can find the example usage for edu.stanford.nlp.util StringUtils join.

Prototype

public static String join(String[] items, String glue)

Source Link

Document

Joins each element in the given array with the given glue.

Usage

From source file:com.romeikat.datamessie.core.processing.service.fulltext.index.IndexBuilder.java

License:Open Source License

private String getClassNames() {
    final List<String> classNames = Lists.newLinkedList();
    for (final Class<?> classToIndex : classesToIndex) {
        final String className = classToIndex.getSimpleName();
        classNames.add(className);//from   w  w  w .  j av a2s . co  m
    }
    return StringUtils.join(classNames, ", ");
}

From source file:com.romeikat.datamessie.core.processing.service.fulltext.query.OutOfQuery.java

License:Open Source License

@Override
public String toString() {
    final StringBuilder stringBuilder = new StringBuilder();
    stringBuilder.append("OutOfQuery: ");
    stringBuilder.append("k = ");
    stringBuilder.append(k);/*from w  w w  . ja  va2  s.co  m*/
    stringBuilder.append(", queryTerms = ");
    stringBuilder.append(StringUtils.join(queryTerms, " "));
    return stringBuilder.toString();
}

From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.Pipeline.java

License:Open Source License

public static void main(String[] args) throws Exception {
    DATASET_DIR = DKProContext.getContext().getWorkspace().getAbsolutePath() + "/Datasets/###/ds";
    GOLDSTANDARD_DIR = DKProContext.getContext().getWorkspace().getAbsolutePath() + "/Datasets/###/gs";

    Options options = new Options();
    options.addOption("d", "dataset", true, "dataset to evaluate: " + StringUtils.join(Dataset.values(), ", "));
    options.addOption("c", "classifier", true,
            "classifier to use: " + StringUtils.join(WekaClassifier.values(), ", "));

    CommandLineParser parser = new PosixParser();
    try {/*from   w  w w  .  ja  v  a2  s. co  m*/
        CommandLine cmd = parser.parse(options, args);

        if (cmd.hasOption("d") && cmd.hasOption("c")) {
            Dataset dataset = Dataset.valueOf(cmd.getOptionValue("d"));
            WekaClassifier wekaClassifier = WekaClassifier.valueOf(cmd.getOptionValue("c"));

            runCV(dataset, wekaClassifier);
        } else {
            new HelpFormatter().printHelp(Pipeline.class.getSimpleName(), options, true);
        }
    } catch (ParseException e) {
        new HelpFormatter().printHelp(Pipeline.class.getSimpleName(), options);
    }
}

From source file:eu.eexcess.diversityasurement.iaselect.MessageCategories.java

License:Open Source License

@Override
public String toString() {
    return "MessageCategories [categories=" + StringUtils.join(categories, ", ") + "]";
}

From source file:ie.pars.bnc.preprocess.ProcessNLP.java

License:Open Source License

/**
 * The return is the line consists of the word lemma tag depRelatoin
 * distanceToGoverner GovWord GovLemma GovTag
 *
 * @param index//  w w  w .  ja  v a  2  s  .  c  o m
 * @param token
 * @param m
 * @param head
 * @param deprel
 * @param headWord
 * @return
 */
private static String line(int index, TaggedWord token, Morphology m, int head, String deprel,
        TaggedWord headWord) {
    ArrayList<String> fields = new ArrayList<>(16);

    // fields.add(Integer.toString(index)); // 1
    fields.add(orNull(token.word())); // 2
    fields.add(orNull(m.lemma(token.word(), token.tag(), true))); // 3
    fields.add(orNull(token.tag())); // 4
    // fields.add(orNull(token.ner()));     // 5
    fields.add(deprel);

    if (head == 0) {
        fields.add(Integer.toString(head)); // 6
        fields.add(NULL_PLACEHOLDER);
        fields.add(NULL_PLACEHOLDER);
        fields.add(NULL_PLACEHOLDER);

    } else if (head > 0) {
        fields.add(Integer.toString(head - index)); // 6
        fields.add(headWord.word());
        fields.add(m.lemma(headWord.word(), headWord.tag(), true));
        fields.add(headWord.tag());

    } else {
        fields.add(NULL_PLACEHOLDER);
        fields.add(NULL_PLACEHOLDER);
        fields.add(NULL_PLACEHOLDER);
        fields.add(NULL_PLACEHOLDER);
        fields.add(NULL_PLACEHOLDER);
    }

    return StringUtils.join(fields, "\t");
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * Run and score coref distributed//from www  . j  a v a 2  s.c  o m
 */
public static void runAndScoreCorefDist(String runDistCmd, Properties props, String propsFile)
        throws Exception {
    PrintWriter pw = IOUtils.getPrintWriter(propsFile);
    props.store(pw, null);
    pw.close();
    /* Run coref job in a distributed manner, score is written to file */
    List<String> cmd = new ArrayList<String>();
    cmd.addAll(Arrays.asList(runDistCmd.split("\\s+")));
    cmd.add("-props");
    cmd.add(propsFile);
    ProcessBuilder pb = new ProcessBuilder(cmd);
    // Copy environment variables over
    Map<String, String> curEnv = System.getenv();
    Map<String, String> pbEnv = pb.environment();
    pbEnv.putAll(curEnv);

    logger.info("Running distributed coref:" + StringUtils.join(pb.command(), " "));
    StringWriter outSos = new StringWriter();
    StringWriter errSos = new StringWriter();
    PrintWriter out = new PrintWriter(new BufferedWriter(outSos));
    PrintWriter err = new PrintWriter(new BufferedWriter(errSos));
    SystemUtils.run(pb, out, err);
    out.close();
    err.close();
    String outStr = outSos.toString();
    String errStr = errSos.toString();
    logger.info("Finished distributed coref: " + runDistCmd + ", props=" + propsFile);
    logger.info("Output: " + outStr);
    if (errStr.length() > 0) {
        logger.info("Error: " + errStr);
    }
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * Given a set of sieves, select an optimal ordering for the sieves by
 * iterating over sieves, and selecting the one that gives the best score
 * and adding sieves one at a time until no more sieves left
 *//*from  w w w . java 2 s.c om*/
public void optimizeSieveOrdering(MentionExtractor mentionExtractor, Properties props, String timestamp)
        throws Exception {
    logger.info("=============SIEVE OPTIMIZATION START ====================");
    logger.info("Optimize sieves using score: " + optimizeScoreType);
    FileFilter scoreFilesFilter = new FileFilter() {
        @Override
        public boolean accept(File file) {
            return file.getAbsolutePath().endsWith(".score");
        }

        public String toString() {
            return ".score";
        }
    };
    Pattern scoreFilePattern = Pattern.compile(".*sieves\\.(\\d+)\\.(\\d+).score");
    String runDistributedCmd = props.getProperty(Constants.RUN_DIST_CMD_PROP);
    String mainWorkDirPath = props.getProperty(Constants.RUN_DIST_CMD_WORK_DIR, "workdir") + "-" + timestamp
            + File.separator;

    double mult = 0.007;
    double dDown[] = { -1, 1, 0, 0 };
    double dUp[] = { 0, 0, -1, 1 };
    double dUpPre[] = { 0, 0, 0, 0 };

    double bestScore = runAndScoreCoref(this, mentionExtractor, props, timestamp);
    for (int it = 0; it < 20; it++) {

        String workDirPath = mainWorkDirPath + it + File.separator;
        File workDir = new File(workDirPath);
        workDir.mkdirs();
        workDirPath = workDir.getAbsolutePath() + File.separator;

        boolean changed = false;
        for (int i = 0; i < dUp.length; i++) {
            double dUpV = dUp[i] * mult;
            double dDownV = dDown[i] * mult;
            double dUpPreV = dUpPre[i] * mult;
            if (dUpV > 1)
                dUpV = 1;
            if (dDownV < 0)
                dDownV = 0;
            if (dUpPreV > 1)
                dUpPreV = 1;

            ((ClassifierSieve) sieves[0]).inconsistencyThreshold += dDownV;
            ((ClassifierSieve) sieves[0]).matchThreshol += dUpPreV;
            ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol += dUpV;

            double score = runAndScoreCoref(this, mentionExtractor, props, timestamp);
            if (score > bestScore) {
                bestScore = score;
                logger.info("new best" + bestScore);
                logger.info("down : " + ((ClassifierSieve) sieves[0]).inconsistencyThreshold + " up:"
                        + ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol);
                changed = true;
            } else if (score < bestScore) {
                logger.info("back from down : " + ((ClassifierSieve) sieves[0]).inconsistencyThreshold + " up:"
                        + ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol);

                ((ClassifierSieve) sieves[0]).inconsistencyThreshold -= dDownV;
                ((ClassifierSieve) sieves[0]).matchThreshol -= dUpPreV;
                ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol -= dUpV;
                logger.info("to down : " + ((ClassifierSieve) sieves[0]).inconsistencyThreshold + " up:"
                        + ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol);

            }
        }
        if (!changed)
            mult *= 0.5;
    }
    logger.info("Final Sieve Ordering: " + StringUtils.join(sieveClassNames, ","));
    logger.info("=============SIEVE OPTIMIZATION DONE ====================");
}

From source file:lv.pipe.NerTagger.java

License:Open Source License

private void merge(List<Annotation> flat, List<CoreLabel> result) {
    if (flat.size() != result.size()) {
        System.err.println("Warning: not equal result and annotation set");
    }/* w  ww. ja v  a 2  s  . co m*/
    for (int i = 0; i < flat.size(); i++) {
        Annotation a = flat.get(i);
        CoreLabel wi = result.get(i);
        if (a.getText().equals(BOUNDARY))
            continue;

        String answer = wi.get(NamedEntityTagAnnotation.class);
        if (answer == null) {
            answer = wi.get(AnswerAnnotation.class);
            if (answer == null)
                answer = "O";
        }
        if (answer != "O")
            a.setNer(answer);

        // Add extra gazetier features used by NER
        String morphoFeats = a.get(LabelMorphoFeatures.class, "").trim();
        if (wi.get(DistSimAnnotation.class) != null)
            morphoFeats += "|Distsim=" + wi.getString(DistSimAnnotation.class);
        if (wi.get(LVGazAnnotation.class) != null && wi.get(LVGazAnnotation.class).size() > 0)
            morphoFeats += "|Gaz=" + StringUtils.join(wi.get(LVGazAnnotation.class), ",");
        if (wi.get(LVGazFileAnnotation.class) != null && wi.get(LVGazFileAnnotation.class).size() > 0)
            morphoFeats += "|GazFile=" + StringUtils.join(wi.get(LVGazFileAnnotation.class), ",");
        a.set(LabelMorphoFeatures.class, morphoFeats);
    }
}

From source file:org.purl.net.wonderland.nlp.CollocationsManager.java

License:Open Source License

static List<WTagging> buildSentenceWithCollocations(List<WTagging> sentence, List<TaggedWord> pennTags) {
    List<WTagging> newSentence = new ArrayList<WTagging>();
    int p = 0;/*from  w  ww  .j a  va 2  s.  c  o m*/
    for (int s = 0; s < sentence.size(); ++s) {
        WTagging wt = sentence.get(s);
        TaggedWord pt = pennTags.get(p);
        String wWord = wt.word();
        String pWord = pt.word();
        if (wWord.equals(pWord)) {
            WTagging tagging = new WTagging();
            MorphAdornerWrapper.copyAdornedWord(tagging, wt);
            tagging.setPennTag(pt.tag());
            newSentence.add(tagging);
            ++p;
        } else {
            WTagging tagging = new WTagging();
            tagging.setWrittenForm(pWord);

            String[] words = pWord.split("_");
            words[0] = wt.getLemma();
            StringBuilder ma = new StringBuilder(wt.getPartsOfSpeech());
            for (int i = 1; i < words.length; ++i) {
                ++s;
                wt = sentence.get(s);
                ma.append(MorphAdornerWrapper.lemmaSeparator);
                ma.append(wt.getPartsOfSpeech());
                words[i] = wt.getLemma();
            }

            tagging.setLemma(StringUtils.join(words, "_"));
            tagging.setPennTag(pt.tag());
            tagging.setPartsOfSpeech(ma.toString());
            tagging.setCollocation(true);
            newSentence.add(tagging);
            ++p;
        }
    }
    return newSentence;
}

From source file:org.purl.net.wonderland.nlp.resources.WordNetWrapper.java

License:Open Source License

public static void listCollocations() {
    try {/*  www  . j av a  2  s.c o  m*/
        Map<String, List<String>> all = new Hashtable<String, List<String>>();

        Iterator<IndexWord> it = dict.getIndexWordIterator(POS.ADJECTIVE);
        while (it.hasNext()) {
            IndexWord w = it.next();
            String lemma = w.getLemma();
            if (lemma.indexOf(" ") >= 0) {
                lemma = StringUtils.join(lemma.split(" "), "_");
                if (!all.containsKey(lemma)) {
                    all.put(lemma, new ArrayList<String>());
                }
                all.get(lemma).add("adjective");
            }
        }

        it = dict.getIndexWordIterator(POS.NOUN);
        while (it.hasNext()) {
            IndexWord w = it.next();
            String lemma = w.getLemma();
            if (lemma.indexOf(" ") >= 0) {
                lemma = StringUtils.join(lemma.split(" "), "_");
                if (!all.containsKey(lemma)) {
                    all.put(lemma, new ArrayList<String>());
                }
                all.get(lemma).add("Nn");
            }
        }

        it = dict.getIndexWordIterator(POS.ADVERB);
        while (it.hasNext()) {
            IndexWord w = it.next();
            String lemma = w.getLemma();
            if (lemma.indexOf(" ") >= 0) {
                lemma = StringUtils.join(lemma.split(" "), "_");
                if (!all.containsKey(lemma)) {
                    all.put(lemma, new ArrayList<String>());
                }
                all.get(lemma).add("adverb");
            }
        }

        it = dict.getIndexWordIterator(POS.VERB);
        while (it.hasNext()) {
            IndexWord w = it.next();
            String lemma = w.getLemma();
            if (lemma.indexOf(" ") >= 0) {
                lemma = StringUtils.join(lemma.split(" "), "_");
                if (!all.containsKey(lemma)) {
                    all.put(lemma, new ArrayList<String>());
                }
                all.get(lemma).add("verb");
            }
        }

        for (String lemma : all.keySet()) {
            List<String> types = all.get(lemma);
            System.out.println(lemma + "," + StringUtils.join(types.toArray(new String[] {}), "|"));
        }

    } catch (JWNLException ex) {
        W.handleException(ex);
    }
}