List of usage examples for com.google.common.collect Multiset entrySet
Set<Entry<E>> entrySet();
From source file:com.cloudera.oryx.rdf.common.rule.NumericDecision.java
static List<Decision> numericDecisionsFromExamples(int featureNumber, Iterable<Example> examples, int suggestedMaxSplitCandidates) { Multiset<Float> sortedFeatureValueCounts = TreeMultiset.create(); StorelessUnivariateStatistic mean = new Mean(); int numExamples = 0; for (Example example : examples) { NumericFeature feature = (NumericFeature) example.getFeature(featureNumber); if (feature == null) { continue; }//from ww w . j a v a 2s . c o m numExamples++; float value = feature.getValue(); sortedFeatureValueCounts.add(value, 1); mean.increment(value); } // Make decisions from split points that divide up input into roughly equal amounts of examples List<Decision> decisions = Lists.newArrayListWithExpectedSize(suggestedMaxSplitCandidates); int approxExamplesPerSplit = FastMath.max(1, numExamples / suggestedMaxSplitCandidates); int examplesInSplit = 0; float lastValue = Float.NaN; // This will iterate in order of value by nature of TreeMap for (Multiset.Entry<Float> entry : sortedFeatureValueCounts.entrySet()) { float value = entry.getElement(); if (examplesInSplit >= approxExamplesPerSplit) { decisions.add( new NumericDecision(featureNumber, (value + lastValue) / 2.0f, (float) mean.getResult())); examplesInSplit = 0; } examplesInSplit += entry.getCount(); lastValue = value; } // The vital condition here is that if decision n decides an example is positive, then all subsequent // decisions in the list will also find it positive. So we need to order from highest threshold to lowest Collections.reverse(decisions); return decisions; }
From source file:com.textocat.textokit.commons.wfstore.DefaultWordformStoreBuilder.java
@Override public DefaultWordformStore<TagType> build() { DefaultWordformStore<TagType> result = new DefaultWordformStore<TagType>(); result.strKeyMap = Maps.newHashMapWithExpectedSize(strKeyMap.size()); for (String wf : strKeyMap.keySet()) { Multiset<TagType> tagBag = strKeyMap.get(wf); int max = 0; TagType maxTag = null;// w ww .j a va2s.co m for (Multiset.Entry<TagType> tagEntry : tagBag.entrySet()) { if (tagEntry.getCount() > max) { max = tagEntry.getCount(); maxTag = tagEntry.getElement(); } } if (maxTag == null) { throw new IllegalStateException(); } result.strKeyMap.put(wf, maxTag); } return result; }
From source file:org.lightjason.agentspeak.action.buildin.collection.list.CSymmetricDifference.java
@Override public final IFuzzyValue<Boolean> execute(final IContext p_context, final boolean p_parallel, final List<ITerm> p_argument, final List<ITerm> p_return, final List<ITerm> p_annotation) { // create a multiset and counts the occurence of element -> on an odd number the element will be returned final Multiset<?> l_count = ConcurrentHashMultiset.create(); CCommon.flatcollection(p_argument).parallel().forEach(i -> l_count.add(i.raw())); final List<?> l_result = l_count.entrySet().parallelStream().filter(i -> i.getCount() % 2 == 1) .collect(Collectors.toList()); p_return.add(CRawTerm.from(p_parallel ? Collections.synchronizedList(l_result) : l_result)); return CFuzzyValue.from(true); }
From source file:org.lightjason.agentspeak.action.builtin.collection.list.CSymmetricDifference.java
@Nonnull @Override//from ww w. ja v a 2 s . co m public final IFuzzyValue<Boolean> execute(final boolean p_parallel, @Nonnull final IContext p_context, @Nonnull final List<ITerm> p_argument, @Nonnull final List<ITerm> p_return) { // create a multiset and counts the occurence of element -> on an odd number the element will be returned final Multiset<Object> l_count = ConcurrentHashMultiset.create(); CCommon.flatten(p_argument).parallel().map(ITerm::raw).forEach(l_count::add); final List<Object> l_result = l_count.entrySet().parallelStream().filter(i -> i.getCount() % 2 == 1) .map(Multiset.Entry::getElement).collect(Collectors.toList()); l_result.sort(Comparator.comparing(Object::hashCode)); p_return.add(CRawTerm.from(p_parallel ? Collections.synchronizedList(l_result) : l_result)); return CFuzzyValue.from(true); }
From source file:org.mule.runtime.extension.api.model.AbstractComplexModel.java
/** * Returns an immutable copy of the {@code values} collection, validating * that no items exist such that its name is repeated * * @param values the collection to copy * @param identifier human friendly identifier of the {@code values} content * @param <T> the generic type of the {@code values} items * @return an immutable copy of the {@code values} */// w w w.jav a 2 s.c o m protected <T extends NamedObject> List<T> unique(Collection<T> values, String identifier) { Multiset<String> names = HashMultiset.create(); values.stream().map(NamedObject::getName).forEach(names::add); List<String> invalid = names.entrySet().stream().filter(entry -> entry.getCount() > 1) .map(Multiset.Entry::getElement).collect(Collectors.toList()); if (!invalid.isEmpty()) { throw new IllegalModelDefinitionException( format("%s %s were defined multiple times", identifier, invalid)); } return copyOf(values); }
From source file:edu.uw.cs.lil.tiny.parser.ccg.factoredlex.features.scorers.LexemeCooccurrenceScorer.java
private double score(List<String> tokens, Multiset<LogicalConstant> constants) { double totalScore = 0.0; int numConstants = 0; for (final Entry<LogicalConstant> entry : constants.entrySet()) { if (!LogicLanguageServices.isCoordinationPredicate(entry.getElement())) { numConstants += entry.getCount(); for (final String word : tokens) { totalScore += indexScore(word, entry.getElement().getName()) * entry.getCount(); }/*from ww w.j a v a2 s . c o m*/ } } return totalScore / (tokens.size() * (numConstants + 1)); }
From source file:com.b2international.snowowl.snomed.reasoner.server.diff.SourceConceptNamespaceAndModuleAssigner.java
@Override public void allocateRelationshipIdsAndModules(Multiset<String> conceptIds, final SnomedEditingContext editingContext) { Multiset<String> reservedIdsByNamespace = HashMultiset.create(); for (Multiset.Entry<String> conceptIdWithCount : conceptIds.entrySet()) { String namespace = SnomedIdentifiers.getNamespace(conceptIdWithCount.getElement()); reservedIdsByNamespace.add(namespace, conceptIdWithCount.getCount()); }/*from w w w . ja va 2s.c o m*/ ISnomedIdentifierService identifierService = getServiceForClass(ISnomedIdentifierService.class); for (Multiset.Entry<String> namespaceWithCount : reservedIdsByNamespace.entrySet()) { Collection<String> reservedIds = identifierService.reserve(namespaceWithCount.getElement(), ComponentCategory.RELATIONSHIP, namespaceWithCount.getCount()); this.reservedIds.addAll(reservedIds); namespaceToRelationshipIdMap.put(namespaceWithCount.getElement(), reservedIds.iterator()); } for (String conceptId : conceptIds.elementSet()) { Concept concept = editingContext.lookup(conceptId, Concept.class); conceptIdToRelationshipModuleMap.put(conceptId, concept.getModule()); } }
From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.WekaProvider.java
@Override public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation) throws AnalysisEngineProcessException { // create attribute (including label) info ArrayList<Attribute> attributes = new ArrayList<>(); ClassifierProvider.featureNames(X).stream().map(Attribute::new).forEachOrdered(attributes::add); Attribute label = new Attribute("__label__", ClassifierProvider.labelNames(Y)); attributes.add(label);// w ww . j av a 2 s . co m String name = Files.getNameWithoutExtension(modelFile.getName()); datasetSchema = new Instances(name, attributes, X.size()); datasetSchema.setClass(label); // add instances Instances trainingInstances = new Instances(datasetSchema, X.size()); if (balanceWeight) { Multiset<String> labelCounts = HashMultiset.create(Y); double maxCount = labelCounts.entrySet().stream().mapToInt(Multiset.Entry::getCount).max() .orElseThrow(AnalysisEngineProcessException::new); for (int i = 0; i < X.size(); i++) { String y = Y.get(i); double weight = maxCount / labelCounts.count(y); trainingInstances.add(newInstance(X.get(i), y, weight, trainingInstances)); } } else { for (int i = 0; i < X.size(); i++) { trainingInstances.add(newInstance(X.get(i), Y.get(i), 1.0, trainingInstances)); } } // training try { classifier = AbstractClassifier.forName(classifierName, options); classifier.buildClassifier(trainingInstances); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } // write model and dataset schema try { SerializationHelper.write(modelFile.getAbsolutePath(), classifier); SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } // backup training dataset as arff file if (datasetExportFile != null) { try { ArffSaver saver = new ArffSaver(); saver.setInstances(trainingInstances); saver.setFile(datasetExportFile); saver.writeBatch(); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } if (crossValidation) { try { Evaluation eval = new Evaluation(trainingInstances); Random rand = new Random(); eval.crossValidateModel(classifier, trainingInstances, 10, rand); LOG.debug(eval.toSummaryString()); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } }
From source file:bio.gcat.operation.analysis.BDAUsage.java
@Override public Result analyse(Collection<Tuple> tuples, Object... values) { if (condenseTuples(tuples).isEmpty()) return new SimpleResult(this, "No tuples."); else if (Tuple.tuplesLength(tuples) != 3) return new SimpleResult(this, "Only sequences with codons (tuples of length 3) are allowed."); bio.gcat.geneticcode.dich.ct.ClassTable classTable = AnalysisTool.getBDATool().getClassTable(); if (classTable == null) return new SimpleResult(this, "No Binary Dichotomic Algorithm. Use BDA Tool to open / create BDAs."); HashMap<Codon, List<Object>> codonToClass = classTable.codon2class(); Multiset<List<Object>> classCount = HashMultiset.create(tuples.stream() .map(tuple -> codonToClass.get(new Codon(tuple.toString()))).collect(Collectors.toList())); StringBuilder builder = new StringBuilder(); for (Entry<List<Object>> count : classCount.entrySet()) builder.append(count.getElement().mkString("", "", "") + ": " + count.getCount() + "<br/>"); return new SimpleResult(this, builder.toString()); }
From source file:edu.cmu.lti.oaqa.baseqa.answer.score.scorers.ConceptTypeAnswerScorer.java
@Override public Map<String, Double> score(JCas jcas, Answer answer) { Set<CandidateAnswerOccurrence> caos = TypeUtil.getCandidateAnswerVariants(answer).stream() .map(TypeUtil::getCandidateAnswerOccurrences).flatMap(Collection::stream).collect(toSet()); Multiset<String> ctypes = HashMultiset.create(); caos.stream().map(TypeUtil::annotationOffset).map(offset2ctypes::get).forEach(ctypes::addAll); ImmutableMap.Builder<String, Double> feat2value = ImmutableMap.builder(); for (Multiset.Entry<String> entry : ctypes.entrySet()) { String type = "ctype-" + entry.getElement(); feat2value.put(type + "/ratio", Scorer.safeDividedBy(entry.getCount(), ctypes.size())); feat2value.put(type + "/binary", 1.0); }//from w w w .j a va 2 s . co m return feat2value.build(); }