List of usage examples for edu.stanford.nlp.util StringUtils join
public static String join(String[] items, String glue)
From source file:com.romeikat.datamessie.core.processing.service.fulltext.index.IndexBuilder.java
License:Open Source License
private String getClassNames() { final List<String> classNames = Lists.newLinkedList(); for (final Class<?> classToIndex : classesToIndex) { final String className = classToIndex.getSimpleName(); classNames.add(className);//from w w w . j av a2s . co m } return StringUtils.join(classNames, ", "); }
From source file:com.romeikat.datamessie.core.processing.service.fulltext.query.OutOfQuery.java
License:Open Source License
@Override public String toString() { final StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("OutOfQuery: "); stringBuilder.append("k = "); stringBuilder.append(k);/*from w w w . ja va2 s.co m*/ stringBuilder.append(", queryTerms = "); stringBuilder.append(StringUtils.join(queryTerms, " ")); return stringBuilder.toString(); }
From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.Pipeline.java
License:Open Source License
public static void main(String[] args) throws Exception { DATASET_DIR = DKProContext.getContext().getWorkspace().getAbsolutePath() + "/Datasets/###/ds"; GOLDSTANDARD_DIR = DKProContext.getContext().getWorkspace().getAbsolutePath() + "/Datasets/###/gs"; Options options = new Options(); options.addOption("d", "dataset", true, "dataset to evaluate: " + StringUtils.join(Dataset.values(), ", ")); options.addOption("c", "classifier", true, "classifier to use: " + StringUtils.join(WekaClassifier.values(), ", ")); CommandLineParser parser = new PosixParser(); try {/*from w w w . ja v a2 s. co m*/ CommandLine cmd = parser.parse(options, args); if (cmd.hasOption("d") && cmd.hasOption("c")) { Dataset dataset = Dataset.valueOf(cmd.getOptionValue("d")); WekaClassifier wekaClassifier = WekaClassifier.valueOf(cmd.getOptionValue("c")); runCV(dataset, wekaClassifier); } else { new HelpFormatter().printHelp(Pipeline.class.getSimpleName(), options, true); } } catch (ParseException e) { new HelpFormatter().printHelp(Pipeline.class.getSimpleName(), options); } }
From source file:eu.eexcess.diversityasurement.iaselect.MessageCategories.java
License:Open Source License
@Override public String toString() { return "MessageCategories [categories=" + StringUtils.join(categories, ", ") + "]"; }
From source file:ie.pars.bnc.preprocess.ProcessNLP.java
License:Open Source License
/** * The return is the line consists of the word lemma tag depRelatoin * distanceToGoverner GovWord GovLemma GovTag * * @param index// w w w . ja v a 2 s . c o m * @param token * @param m * @param head * @param deprel * @param headWord * @return */ private static String line(int index, TaggedWord token, Morphology m, int head, String deprel, TaggedWord headWord) { ArrayList<String> fields = new ArrayList<>(16); // fields.add(Integer.toString(index)); // 1 fields.add(orNull(token.word())); // 2 fields.add(orNull(m.lemma(token.word(), token.tag(), true))); // 3 fields.add(orNull(token.tag())); // 4 // fields.add(orNull(token.ner())); // 5 fields.add(deprel); if (head == 0) { fields.add(Integer.toString(head)); // 6 fields.add(NULL_PLACEHOLDER); fields.add(NULL_PLACEHOLDER); fields.add(NULL_PLACEHOLDER); } else if (head > 0) { fields.add(Integer.toString(head - index)); // 6 fields.add(headWord.word()); fields.add(m.lemma(headWord.word(), headWord.tag(), true)); fields.add(headWord.tag()); } else { fields.add(NULL_PLACEHOLDER); fields.add(NULL_PLACEHOLDER); fields.add(NULL_PLACEHOLDER); fields.add(NULL_PLACEHOLDER); fields.add(NULL_PLACEHOLDER); } return StringUtils.join(fields, "\t"); }
From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java
License:Open Source License
/** * Run and score coref distributed//from www . j a v a 2 s.c o m */ public static void runAndScoreCorefDist(String runDistCmd, Properties props, String propsFile) throws Exception { PrintWriter pw = IOUtils.getPrintWriter(propsFile); props.store(pw, null); pw.close(); /* Run coref job in a distributed manner, score is written to file */ List<String> cmd = new ArrayList<String>(); cmd.addAll(Arrays.asList(runDistCmd.split("\\s+"))); cmd.add("-props"); cmd.add(propsFile); ProcessBuilder pb = new ProcessBuilder(cmd); // Copy environment variables over Map<String, String> curEnv = System.getenv(); Map<String, String> pbEnv = pb.environment(); pbEnv.putAll(curEnv); logger.info("Running distributed coref:" + StringUtils.join(pb.command(), " ")); StringWriter outSos = new StringWriter(); StringWriter errSos = new StringWriter(); PrintWriter out = new PrintWriter(new BufferedWriter(outSos)); PrintWriter err = new PrintWriter(new BufferedWriter(errSos)); SystemUtils.run(pb, out, err); out.close(); err.close(); String outStr = outSos.toString(); String errStr = errSos.toString(); logger.info("Finished distributed coref: " + runDistCmd + ", props=" + propsFile); logger.info("Output: " + outStr); if (errStr.length() > 0) { logger.info("Error: " + errStr); } }
From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java
License:Open Source License
/** * Given a set of sieves, select an optimal ordering for the sieves by * iterating over sieves, and selecting the one that gives the best score * and adding sieves one at a time until no more sieves left *//*from w w w . java 2 s.c om*/ public void optimizeSieveOrdering(MentionExtractor mentionExtractor, Properties props, String timestamp) throws Exception { logger.info("=============SIEVE OPTIMIZATION START ===================="); logger.info("Optimize sieves using score: " + optimizeScoreType); FileFilter scoreFilesFilter = new FileFilter() { @Override public boolean accept(File file) { return file.getAbsolutePath().endsWith(".score"); } public String toString() { return ".score"; } }; Pattern scoreFilePattern = Pattern.compile(".*sieves\\.(\\d+)\\.(\\d+).score"); String runDistributedCmd = props.getProperty(Constants.RUN_DIST_CMD_PROP); String mainWorkDirPath = props.getProperty(Constants.RUN_DIST_CMD_WORK_DIR, "workdir") + "-" + timestamp + File.separator; double mult = 0.007; double dDown[] = { -1, 1, 0, 0 }; double dUp[] = { 0, 0, -1, 1 }; double dUpPre[] = { 0, 0, 0, 0 }; double bestScore = runAndScoreCoref(this, mentionExtractor, props, timestamp); for (int it = 0; it < 20; it++) { String workDirPath = mainWorkDirPath + it + File.separator; File workDir = new File(workDirPath); workDir.mkdirs(); workDirPath = workDir.getAbsolutePath() + File.separator; boolean changed = false; for (int i = 0; i < dUp.length; i++) { double dUpV = dUp[i] * mult; double dDownV = dDown[i] * mult; double dUpPreV = dUpPre[i] * mult; if (dUpV > 1) dUpV = 1; if (dDownV < 0) dDownV = 0; if (dUpPreV > 1) dUpPreV = 1; ((ClassifierSieve) sieves[0]).inconsistencyThreshold += dDownV; ((ClassifierSieve) sieves[0]).matchThreshol += dUpPreV; ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol += dUpV; double score = runAndScoreCoref(this, mentionExtractor, props, timestamp); if (score > bestScore) { bestScore = score; logger.info("new best" + bestScore); logger.info("down : " + ((ClassifierSieve) sieves[0]).inconsistencyThreshold + " up:" + ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol); changed = true; } else if (score < bestScore) { logger.info("back from down : " + ((ClassifierSieve) sieves[0]).inconsistencyThreshold + " up:" + ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol); ((ClassifierSieve) sieves[0]).inconsistencyThreshold -= dDownV; ((ClassifierSieve) sieves[0]).matchThreshol -= dUpPreV; ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol -= dUpV; logger.info("to down : " + ((ClassifierSieve) sieves[0]).inconsistencyThreshold + " up:" + ((ClassifierSieve) sieves[sieves.length - 1]).matchThreshol); } } if (!changed) mult *= 0.5; } logger.info("Final Sieve Ordering: " + StringUtils.join(sieveClassNames, ",")); logger.info("=============SIEVE OPTIMIZATION DONE ===================="); }
From source file:lv.pipe.NerTagger.java
License:Open Source License
private void merge(List<Annotation> flat, List<CoreLabel> result) { if (flat.size() != result.size()) { System.err.println("Warning: not equal result and annotation set"); }/* w ww. ja v a 2 s . co m*/ for (int i = 0; i < flat.size(); i++) { Annotation a = flat.get(i); CoreLabel wi = result.get(i); if (a.getText().equals(BOUNDARY)) continue; String answer = wi.get(NamedEntityTagAnnotation.class); if (answer == null) { answer = wi.get(AnswerAnnotation.class); if (answer == null) answer = "O"; } if (answer != "O") a.setNer(answer); // Add extra gazetier features used by NER String morphoFeats = a.get(LabelMorphoFeatures.class, "").trim(); if (wi.get(DistSimAnnotation.class) != null) morphoFeats += "|Distsim=" + wi.getString(DistSimAnnotation.class); if (wi.get(LVGazAnnotation.class) != null && wi.get(LVGazAnnotation.class).size() > 0) morphoFeats += "|Gaz=" + StringUtils.join(wi.get(LVGazAnnotation.class), ","); if (wi.get(LVGazFileAnnotation.class) != null && wi.get(LVGazFileAnnotation.class).size() > 0) morphoFeats += "|GazFile=" + StringUtils.join(wi.get(LVGazFileAnnotation.class), ","); a.set(LabelMorphoFeatures.class, morphoFeats); } }
From source file:org.purl.net.wonderland.nlp.CollocationsManager.java
License:Open Source License
static List<WTagging> buildSentenceWithCollocations(List<WTagging> sentence, List<TaggedWord> pennTags) { List<WTagging> newSentence = new ArrayList<WTagging>(); int p = 0;/*from w ww .j a va 2 s. c o m*/ for (int s = 0; s < sentence.size(); ++s) { WTagging wt = sentence.get(s); TaggedWord pt = pennTags.get(p); String wWord = wt.word(); String pWord = pt.word(); if (wWord.equals(pWord)) { WTagging tagging = new WTagging(); MorphAdornerWrapper.copyAdornedWord(tagging, wt); tagging.setPennTag(pt.tag()); newSentence.add(tagging); ++p; } else { WTagging tagging = new WTagging(); tagging.setWrittenForm(pWord); String[] words = pWord.split("_"); words[0] = wt.getLemma(); StringBuilder ma = new StringBuilder(wt.getPartsOfSpeech()); for (int i = 1; i < words.length; ++i) { ++s; wt = sentence.get(s); ma.append(MorphAdornerWrapper.lemmaSeparator); ma.append(wt.getPartsOfSpeech()); words[i] = wt.getLemma(); } tagging.setLemma(StringUtils.join(words, "_")); tagging.setPennTag(pt.tag()); tagging.setPartsOfSpeech(ma.toString()); tagging.setCollocation(true); newSentence.add(tagging); ++p; } } return newSentence; }
From source file:org.purl.net.wonderland.nlp.resources.WordNetWrapper.java
License:Open Source License
public static void listCollocations() { try {/* www . j av a 2 s.c o m*/ Map<String, List<String>> all = new Hashtable<String, List<String>>(); Iterator<IndexWord> it = dict.getIndexWordIterator(POS.ADJECTIVE); while (it.hasNext()) { IndexWord w = it.next(); String lemma = w.getLemma(); if (lemma.indexOf(" ") >= 0) { lemma = StringUtils.join(lemma.split(" "), "_"); if (!all.containsKey(lemma)) { all.put(lemma, new ArrayList<String>()); } all.get(lemma).add("adjective"); } } it = dict.getIndexWordIterator(POS.NOUN); while (it.hasNext()) { IndexWord w = it.next(); String lemma = w.getLemma(); if (lemma.indexOf(" ") >= 0) { lemma = StringUtils.join(lemma.split(" "), "_"); if (!all.containsKey(lemma)) { all.put(lemma, new ArrayList<String>()); } all.get(lemma).add("Nn"); } } it = dict.getIndexWordIterator(POS.ADVERB); while (it.hasNext()) { IndexWord w = it.next(); String lemma = w.getLemma(); if (lemma.indexOf(" ") >= 0) { lemma = StringUtils.join(lemma.split(" "), "_"); if (!all.containsKey(lemma)) { all.put(lemma, new ArrayList<String>()); } all.get(lemma).add("adverb"); } } it = dict.getIndexWordIterator(POS.VERB); while (it.hasNext()) { IndexWord w = it.next(); String lemma = w.getLemma(); if (lemma.indexOf(" ") >= 0) { lemma = StringUtils.join(lemma.split(" "), "_"); if (!all.containsKey(lemma)) { all.put(lemma, new ArrayList<String>()); } all.get(lemma).add("verb"); } } for (String lemma : all.keySet()) { List<String> types = all.get(lemma); System.out.println(lemma + "," + StringUtils.join(types.toArray(new String[] {}), "|")); } } catch (JWNLException ex) { W.handleException(ex); } }