List of usage examples for com.google.common.collect Multiset elementSet
Set<E> elementSet();
From source file:org.apache.lucene.benchmark.quality.mc.IntrinsicEvaluator.java
public static void main(String[] args) { // http://dumps.wikimedia.org/trwiki/20150121/trwiki-20150121-pages-meta-current.xml.bz2 String bz2Filename = "/Users/iorixxx/trwiki-20150121-pages-meta-current.xml.bz2"; try {// w w w . j a v a 2 s. com IArticleFilter handler = new DemoArticleFilter(); WikiXMLParser wxp = new WikiXMLParser(bz2Filename, handler); wxp.parse(); } catch (Exception e) { e.printStackTrace(); } prune(collisions, 1); List<Multiset<String>> allTheLists = new ArrayList<>(collisions.values()); Collections.sort(allTheLists, new Comparator<Multiset<String>>() { @Override public int compare(Multiset<String> a1, Multiset<String> a2) { // biggest to smallest return a2.elementSet().size() - a1.elementSet().size(); } }); for (Multiset<String> set : allTheLists) if (set.entrySet().size() > 1) { System.out.println(set); } for (Deasciifier deasciifier : deasciifiers) { deasciifier.printAccuracy(); } System.out.println("Total number of words : " + globalCounter); }
From source file:org.apache.ctakes.relationextractor.eval.PrintRelationStatistics.java
public static void main(String[] args) throws Exception { Options options = new Options(); CmdLineParser parser = new CmdLineParser(options); parser.parseArgument(args);//from w ww.j av a2 s . c o m CollectionReader reader = CollectionReaderFactory.createReader(XReader.class, FilesCollectionReader.PARAM_ROOT_FILE, options.trainDirectory.getPath()); Multiset<Integer> mentionsBetweenCounts = HashMultiset.create(); JCas jCas = JCasFactory.createJCasFromPath("../ctakes-type-system/desc/common_type_system.xml"); while (reader.hasNext()) { reader.getNext(jCas.getCas()); JCas goldView = jCas.getView(GOLD_VIEW_NAME); for (BinaryTextRelation relation : JCasUtil.select(goldView, BinaryTextRelation.class)) { Annotation arg1 = relation.getArg1().getArgument(); Annotation arg2 = relation.getArg2().getArgument(); int mentionsBetween; if (arg1.getBegin() < arg2.getBegin()) { mentionsBetween = JCasUtil .selectCovered(goldView, EntityMention.class, arg1.getEnd(), arg2.getBegin()).size(); } else { mentionsBetween = -JCasUtil .selectCovered(goldView, EntityMention.class, arg2.getEnd(), arg1.getBegin()).size(); } mentionsBetweenCounts.add(mentionsBetween); } } List<Integer> mentionsBetweenKeys = new ArrayList<Integer>(mentionsBetweenCounts.elementSet()); Collections.sort(mentionsBetweenKeys); for (Integer mentionsBetween : mentionsBetweenKeys) { System.err.printf("%d x%d\n", mentionsBetween, mentionsBetweenCounts.count(mentionsBetween)); } }
From source file:org.apache.mahout.classifier.sgd.TrainNewsGroups.java
public static void main(String[] args) throws IOException { File base = new File(args[0]); Multiset<String> overallCounts = HashMultiset.create(); int leakType = 0; if (args.length > 1) { leakType = Integer.parseInt(args[1]); }// w ww . j a va 2 s.c o m Dictionary newsGroups = new Dictionary(); NewsgroupHelper helper = new NewsgroupHelper(); helper.getEncoder().setProbes(2); AdaptiveLogisticRegression learningAlgorithm = new AdaptiveLogisticRegression(20, NewsgroupHelper.FEATURES, new L1()); learningAlgorithm.setInterval(800); learningAlgorithm.setAveragingWindow(500); List<File> files = Lists.newArrayList(); for (File newsgroup : base.listFiles()) { if (newsgroup.isDirectory()) { newsGroups.intern(newsgroup.getName()); files.addAll(Arrays.asList(newsgroup.listFiles())); } } Collections.shuffle(files); System.out.println(files.size() + " training files"); SGDInfo info = new SGDInfo(); int k = 0; for (File file : files) { String ng = file.getParentFile().getName(); int actual = newsGroups.intern(ng); Vector v = helper.encodeFeatureVector(file, actual, leakType, overallCounts); learningAlgorithm.train(actual, v); k++; State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best = learningAlgorithm.getBest(); SGDHelper.analyzeState(info, leakType, k, best); } learningAlgorithm.close(); SGDHelper.dissect(leakType, newsGroups, learningAlgorithm, files, overallCounts); System.out.println("exiting main"); ModelSerializer.writeBinary("/tmp/news-group.model", learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0)); List<Integer> counts = Lists.newArrayList(); System.out.println("Word counts"); for (String count : overallCounts.elementSet()) { counts.add(overallCounts.count(count)); } Collections.sort(counts, Ordering.natural().reverse()); k = 0; for (Integer count : counts) { System.out.println(k + "\t" + count); k++; if (k > 1000) { break; } } }
From source file:com.memonews.mahout.sentiment.SentimentModelTrainer.java
public static void main(final String[] args) throws IOException { final File base = new File(args[0]); final String modelPath = args.length > 1 ? args[1] : "target/model"; final Multiset<String> overallCounts = HashMultiset.create(); final Dictionary newsGroups = new Dictionary(); final SentimentModelHelper helper = new SentimentModelHelper(); helper.getEncoder().setProbes(2);/*from ww w.j a v a2s. c o m*/ final AdaptiveLogisticRegression learningAlgorithm = new AdaptiveLogisticRegression(2, SentimentModelHelper.FEATURES, new L1()); learningAlgorithm.setInterval(800); learningAlgorithm.setAveragingWindow(500); final List<File> files = Lists.newArrayList(); for (final File newsgroup : base.listFiles()) { if (newsgroup.isDirectory()) { newsGroups.intern(newsgroup.getName()); files.addAll(Arrays.asList(newsgroup.listFiles())); } } Collections.shuffle(files); System.out.printf("%d training files\n", files.size()); final SGDInfo info = new SGDInfo(); int k = 0; for (final File file : files) { final String ng = file.getParentFile().getName(); final int actual = newsGroups.intern(ng); final Vector v = helper.encodeFeatureVector(file, overallCounts); learningAlgorithm.train(actual, v); k++; final State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best = learningAlgorithm.getBest(); SGDHelper.analyzeState(info, 0, k, best); } learningAlgorithm.close(); SGDHelper.dissect(0, newsGroups, learningAlgorithm, files, overallCounts); System.out.println("exiting main"); ModelSerializer.writeBinary(modelPath, learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0)); final List<Integer> counts = Lists.newArrayList(); System.out.printf("Word counts\n"); for (final String count : overallCounts.elementSet()) { counts.add(overallCounts.count(count)); } Collections.sort(counts, Ordering.natural().reverse()); k = 0; for (final Integer count : counts) { System.out.printf("%d\t%d\n", k, count); k++; if (k > 1000) { break; } } }
From source file:com.music.tools.MidiAnalyzer.java
public static void main(String[] args) { Score score = new Score(); Read.midi(score, "C:\\workspace\\music\\analysis\\midi\\jarre\\EQUINOX3.MID"); for (Part part : score.getPartArray()) { System.out.println(part.getTitle() + " : " + part.getInstrument()); }/*from ww w . j a v a 2 s. c om*/ Part part = score.getPart(1); System.out.println(part.getInstrument()); part.setTempo(160); int previousPitch = 0; int prePreviousPitch = 0; System.out.println(score.getTimeSignature()); Multiset<Integer> uniqueIntervals = HashMultiset.create(); int directionChanges = 0; int directionRetentions = 0; LinkedList<Double> noteLengths = new LinkedList<>(); for (Note note : part.getPhrase(0).getNoteArray()) { System.out.println(note.getPitch()); if (!note.isRest()) { if (prePreviousPitch != 0) { int previousDiff = previousPitch - prePreviousPitch; int diff = note.getPitch() - previousPitch; if (Math.signum(previousDiff) != Math.signum(diff) && diff != 0 && previousDiff != 0) { directionChanges++; System.out.println(prePreviousPitch + ":" + previousPitch + ":" + note.getPitch()); } else if (diff != 0 && previousDiff != 0) { directionRetentions++; } } if (note.getPitch() - previousPitch != 0) { prePreviousPitch = previousPitch; } uniqueIntervals.add(previousPitch - note.getPitch()); previousPitch = note.getPitch(); } noteLengths.add(note.getRhythmValue()); } double normalizedBeatSize = 1d * score.getNumerator() * 4 / score.getDenominator(); System.out.println("Beat size: " + normalizedBeatSize); double currentBeatSize = 0; int beats = 0; int beatsWithPerfectHalves = 0; // reverse, to avoid off-beats for (Iterator<Double> it = noteLengths.descendingIterator(); it.hasNext();) { currentBeatSize += it.next(); ; if (currentBeatSize >= normalizedBeatSize) { currentBeatSize = 0; beats++; } if (currentBeatSize == normalizedBeatSize / 2) { beatsWithPerfectHalves++; } } System.out.println("Beats:beats with perfect halves -- " + beats + ":" + beatsWithPerfectHalves); Hashtable<String, Object> table = PhraseAnalysis.getAllStatistics(score.getPart(1).getPhrase(0), 1, 0, Scales.MAJOR_SCALE); for (Entry<String, Object> entry : table.entrySet()) { System.out.println(entry.getKey() + "=" + entry.getValue()); } for (Integer interval : uniqueIntervals.elementSet()) { System.out.println(interval + " : " + uniqueIntervals.count(interval)); } System.out.println("---"); System.out.println(directionChanges + " : " + directionRetentions); Play.midi(part); }
From source file:org.trnltk.experiment.morphology.ambiguity.AmbiguityClassifier.java
public static void main(String[] args) throws IOException, JSONException { int numberOfWords = 0; int numberOfParseResults = 0; final Multiset<ParseResultDifference> differenceSet = HashMultiset.create(); final Multiset<ParseResultDifference> differenceSetWithoutRootDifferences = HashMultiset.create(); final File folder = new File("D:\\devl\\data\\1MSentences\\split"); final File[] files = folder.listFiles(); if (files == null) throw new RuntimeException(); final StopWatch stopWatch = new StopWatch(); stopWatch.start();/* w w w. java 2 s . c om*/ for (int fileIndex = 0; fileIndex < files.length; fileIndex++) { File file = files[fileIndex]; System.out.println("Processing file " + file); // final BufferedReader reader = new BufferedReader(new FileReader(file)); // while (reader.ready()) { // reader.readLine(); // } final ParseResultReader parseResultReader = new ParseResultReader(); final ParseResultDiffTool parseResultDiffTool = new ParseResultDiffTool(); final List<WordParseResultEntry> parseResultEntries = parseResultReader .getParseResultEntries(Files.newReader(file, Charsets.UTF_8)); numberOfWords += parseResultEntries.size(); //noinspection ForLoopReplaceableByForEach for (int parseResultEntryIndex = 0; parseResultEntryIndex < parseResultEntries .size(); parseResultEntryIndex++) { WordParseResultEntry parseResultEntry = parseResultEntries.get(parseResultEntryIndex); final List<ParseResult> parseResults = parseResultEntry.getParseResults(); numberOfParseResults += parseResults.size(); for (int i = 0; i < parseResults.size(); i++) { final ParseResult leftParseResult = parseResults.get(i); for (int j = i + 1; j < parseResults.size(); j++) { final ParseResult rightParseResult = parseResults.get(j); final ParseResultDifference difference = parseResultDiffTool.findDifference(leftParseResult, rightParseResult); final boolean added = differenceSet.add(difference); if (added && difference.hasNoRootDifference() && difference.hasPartDifference()) differenceSetWithoutRootDifferences.add(difference); } } } //noinspection ConstantConditions if (fileIndex == 0) break; } stopWatch.stop(); final long time = stopWatch.getTime(); System.out.println(stopWatch); System.out.println(Long.valueOf(time).doubleValue() / (51)); System.out.println("Number of words : " + numberOfWords); System.out.println("Number of parseResults : " + numberOfParseResults); System.out.println("Number of distinct differences : " + differenceSet.elementSet().size()); System.out.println("numberOfDistinctDifferencesWithoutRootDifference : " + differenceSetWithoutRootDifferences.elementSet().size()); final ImmutableMultiset<ParseResultDifference> sortedDifferenceSetWithoutRootDifferences = Multisets .copyHighestCountFirst(differenceSetWithoutRootDifferences); for (ParseResultDifference parseResultDifference : sortedDifferenceSetWithoutRootDifferences.elementSet()) { final int count = sortedDifferenceSetWithoutRootDifferences.count(parseResultDifference); if (count > 100) { System.out.println(count); System.out.println(parseResultDifference); } } }
From source file:com.tinspx.util.collect.NotNull.java
/** * Wraps {@code multiset} as a {@code Multiset} that doe not allow * {@code null} elements to be added.// w w w. j a va 2s . c o m * * @throws NullPointerException if any existing element in {@code multiset} * is {@code null} */ public static <E> Multiset<E> multiset(Multiset<E> multiset) { CollectUtils.checkAllNotNull(multiset.elementSet()); return Predicated.multiset(multiset, Predicates.notNull()); }
From source file:org.sonar.api.measures.MultisetDistributionFormat.java
static String format(Multiset countBag) { StringBuilder sb = new StringBuilder(); boolean first = true; for (Object obj : countBag.elementSet()) { if (!first) { sb.append(KeyValueFormat.PAIR_SEPARATOR); }// w w w . j ava2 s . co m sb.append(obj.toString()); sb.append(KeyValueFormat.FIELD_SEPARATOR); // -1 allows to include zero values sb.append(countBag.count(obj) - 1); first = false; } return sb.toString(); }
From source file:edu.berkeley.compbio.phyloutils.EnvironmentParser.java
public static Collection<RootedPhylogeny<String>> read(InputStream is, RootedPhylogeny<String> tree) throws IOException, TreeException, NoSuchNodeException { BufferedReader r = new BufferedReader(new InputStreamReader(is)); Map<String, Multiset<String>> environmentCounts = new HashMap<String, Multiset<String>>(); String line;/*from w w w.j a va 2 s.c o m*/ while ((line = r.readLine()) != null) { String[] tokens = line.split(" "); Multiset<String> env = environmentCounts.get(tokens[1]); if (env == null) { env = HashMultiset.create(); environmentCounts.put(tokens[1], env); } env.add(tokens[0], Integer.parseInt(tokens[2])); } Set<RootedPhylogeny<String>> result = new HashSet<RootedPhylogeny<String>>(); for (Map.Entry<String, Multiset<String>> entry : environmentCounts.entrySet()) { String name = entry.getKey(); Multiset<String> ids = entry.getValue(); RootedPhylogeny<String> subtree = tree.extractTreeWithLeafIDs(ids.elementSet(), false, false, AbstractRootedPhylogeny.MutualExclusionResolutionMode.EXCEPTION); subtree.setPayload(name); subtree.setLeafWeights(ids); result.add(subtree); } return result; }
From source file:org.javafunk.funk.Multisets.java
public static <T> Multiset<T> union(Iterable<? extends Iterable<? extends T>> iterables) { Multiset<T> unionMultiset = multisetFrom(first(iterables).get()); for (Iterable<? extends T> iterable : rest(iterables)) { Multiset<T> currentMultiset = multisetFrom(iterable); for (T element : currentMultiset.elementSet()) { int numberInUnionMultiset = unionMultiset.count(element); int numberInCurrentMultiset = currentMultiset.count(element); if (numberInUnionMultiset < numberInCurrentMultiset) { unionMultiset.setCount(element, numberInCurrentMultiset); }/*from w w w. j a v a 2 s . c o m*/ } } return unionMultiset; }