List of usage examples for com.google.common.collect Multiset add
@Override
boolean add(E element);
From source file:PostgresClassifier.java
public static void main(String[] args) throws Exception { if (args.length < 5) { System.out.println(//w w w . j a va2 s . co m "Arguments: [model] [label index] [dictionnary] [document frequency] [input postgres table]"); return; } String modelPath = args[0]; String labelIndexPath = args[1]; String dictionaryPath = args[2]; String documentFrequencyPath = args[3]; String tablename = args[4]; Configuration configuration = new Configuration(); // model is a matrix (wordId, labelId) => probability score NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath), configuration); StandardNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model); // labels is a map label => classId Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration, new Path(labelIndexPath)); Map<String, Integer> dictionary = readDictionnary(configuration, new Path(dictionaryPath)); Map<Integer, Long> documentFrequency = readDocumentFrequency(configuration, new Path(documentFrequencyPath)); // analyzer used to extract word from tweet Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); int labelCount = labels.size(); int documentCount = documentFrequency.get(-1).intValue(); System.out.println("Number of labels: " + labelCount); System.out.println("Number of documents in training set: " + documentCount); Connection c = null; Statement stmt = null; Statement stmtU = null; try { Class.forName("org.postgresql.Driver"); c = DriverManager.getConnection("jdbc:postgresql://192.168.50.170:5432/uzeni", "postgres", "dbwpsdkdl"); c.setAutoCommit(false); System.out.println("Opened database successfully"); stmt = c.createStatement(); stmtU = c.createStatement(); ResultSet rs = stmt.executeQuery("SELECT * FROM " + tablename + " WHERE rep is null"); while (rs.next()) { String seq = rs.getString("seq"); //String rep = rs.getString("rep"); String body = rs.getString("body"); //String category = rep; String id = seq; String message = body; //System.out.println("Doc: " + id + "\t" + message); Multiset<String> words = ConcurrentHashMultiset.create(); // extract words from tweet TokenStream ts = analyzer.tokenStream("text", new StringReader(message)); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); int wordCount = 0; while (ts.incrementToken()) { if (termAtt.length() > 0) { String word = ts.getAttribute(CharTermAttribute.class).toString(); Integer wordId = dictionary.get(word); // if the word is not in the dictionary, skip it if (wordId != null) { words.add(word); wordCount++; } } } // Mark : Modified ts.end(); ts.close(); // create vector wordId => weight using tfidf Vector vector = new RandomAccessSparseVector(10000); TFIDF tfidf = new TFIDF(); for (Multiset.Entry<String> entry : words.entrySet()) { String word = entry.getElement(); int count = entry.getCount(); Integer wordId = dictionary.get(word); Long freq = documentFrequency.get(wordId); double tfIdfValue = tfidf.calculate(count, freq.intValue(), wordCount, documentCount); vector.setQuick(wordId, tfIdfValue); } // With the classifier, we get one score for each label // The label with the highest score is the one the tweet is more likely to // be associated to Vector resultVector = classifier.classifyFull(vector); double bestScore = -Double.MAX_VALUE; int bestCategoryId = -1; for (Element element : resultVector.all()) { int categoryId = element.index(); double score = element.get(); if (score > bestScore) { bestScore = score; bestCategoryId = categoryId; } //System.out.print(" " + labels.get(categoryId) + ": " + score); } //System.out.println(" => " + labels.get(bestCategoryId)); //System.out.println("UPDATE " + tablename + " SET rep = '" + labels.get(bestCategoryId) + "' WHERE seq = " + id ); stmtU.executeUpdate("UPDATE " + tablename + " SET rep = '" + labels.get(bestCategoryId) + "' WHERE seq = " + id); } rs.close(); stmt.close(); stmtU.close(); c.commit(); c.close(); analyzer.close(); } catch (Exception e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); System.exit(0); } }
From source file:org.apache.ctakes.relationextractor.eval.PrintRelationStatistics.java
public static void main(String[] args) throws Exception { Options options = new Options(); CmdLineParser parser = new CmdLineParser(options); parser.parseArgument(args);/* w w w . j a v a 2 s .co m*/ CollectionReader reader = CollectionReaderFactory.createReader(XReader.class, FilesCollectionReader.PARAM_ROOT_FILE, options.trainDirectory.getPath()); Multiset<Integer> mentionsBetweenCounts = HashMultiset.create(); JCas jCas = JCasFactory.createJCasFromPath("../ctakes-type-system/desc/common_type_system.xml"); while (reader.hasNext()) { reader.getNext(jCas.getCas()); JCas goldView = jCas.getView(GOLD_VIEW_NAME); for (BinaryTextRelation relation : JCasUtil.select(goldView, BinaryTextRelation.class)) { Annotation arg1 = relation.getArg1().getArgument(); Annotation arg2 = relation.getArg2().getArgument(); int mentionsBetween; if (arg1.getBegin() < arg2.getBegin()) { mentionsBetween = JCasUtil .selectCovered(goldView, EntityMention.class, arg1.getEnd(), arg2.getBegin()).size(); } else { mentionsBetween = -JCasUtil .selectCovered(goldView, EntityMention.class, arg2.getEnd(), arg1.getBegin()).size(); } mentionsBetweenCounts.add(mentionsBetween); } } List<Integer> mentionsBetweenKeys = new ArrayList<Integer>(mentionsBetweenCounts.elementSet()); Collections.sort(mentionsBetweenKeys); for (Integer mentionsBetween : mentionsBetweenKeys) { System.err.printf("%d x%d\n", mentionsBetween, mentionsBetweenCounts.count(mentionsBetween)); } }
From source file:com.music.tools.MidiAnalyzer.java
public static void main(String[] args) { Score score = new Score(); Read.midi(score, "C:\\workspace\\music\\analysis\\midi\\jarre\\EQUINOX3.MID"); for (Part part : score.getPartArray()) { System.out.println(part.getTitle() + " : " + part.getInstrument()); }//from www . j av a 2s . c o m Part part = score.getPart(1); System.out.println(part.getInstrument()); part.setTempo(160); int previousPitch = 0; int prePreviousPitch = 0; System.out.println(score.getTimeSignature()); Multiset<Integer> uniqueIntervals = HashMultiset.create(); int directionChanges = 0; int directionRetentions = 0; LinkedList<Double> noteLengths = new LinkedList<>(); for (Note note : part.getPhrase(0).getNoteArray()) { System.out.println(note.getPitch()); if (!note.isRest()) { if (prePreviousPitch != 0) { int previousDiff = previousPitch - prePreviousPitch; int diff = note.getPitch() - previousPitch; if (Math.signum(previousDiff) != Math.signum(diff) && diff != 0 && previousDiff != 0) { directionChanges++; System.out.println(prePreviousPitch + ":" + previousPitch + ":" + note.getPitch()); } else if (diff != 0 && previousDiff != 0) { directionRetentions++; } } if (note.getPitch() - previousPitch != 0) { prePreviousPitch = previousPitch; } uniqueIntervals.add(previousPitch - note.getPitch()); previousPitch = note.getPitch(); } noteLengths.add(note.getRhythmValue()); } double normalizedBeatSize = 1d * score.getNumerator() * 4 / score.getDenominator(); System.out.println("Beat size: " + normalizedBeatSize); double currentBeatSize = 0; int beats = 0; int beatsWithPerfectHalves = 0; // reverse, to avoid off-beats for (Iterator<Double> it = noteLengths.descendingIterator(); it.hasNext();) { currentBeatSize += it.next(); ; if (currentBeatSize >= normalizedBeatSize) { currentBeatSize = 0; beats++; } if (currentBeatSize == normalizedBeatSize / 2) { beatsWithPerfectHalves++; } } System.out.println("Beats:beats with perfect halves -- " + beats + ":" + beatsWithPerfectHalves); Hashtable<String, Object> table = PhraseAnalysis.getAllStatistics(score.getPart(1).getPhrase(0), 1, 0, Scales.MAJOR_SCALE); for (Entry<String, Object> entry : table.entrySet()) { System.out.println(entry.getKey() + "=" + entry.getValue()); } for (Integer interval : uniqueIntervals.elementSet()) { System.out.println(interval + " : " + uniqueIntervals.count(interval)); } System.out.println("---"); System.out.println(directionChanges + " : " + directionRetentions); Play.midi(part); }
From source file:edu.mit.streamjit.test.StreamFuzzer.java
public static void main(String[] args) throws InterruptedException, IOException { StreamCompiler debugSC = new InterpreterStreamCompiler(); StreamCompiler compilerSC = new Compiler2StreamCompiler(); Set<FuzzElement> completedCases = new HashSet<>(); int generated; int duplicatesSkipped = 0; Multiset<Class<?>> ignored = HashMultiset.create(ignoredExceptions.size()); int failures = 0, successes = 0; next_case: for (generated = 0; true; ++generated) { FuzzElement fuzz = StreamFuzzer.generate(); if (!completedCases.add(fuzz)) { ++duplicatesSkipped;/* w w w .j a v a 2 s. com*/ continue; } try { fuzz.instantiate().visit(new CheckVisitor()); } catch (IllegalStreamGraphException ex) { System.out.println("Fuzzer generated bad test case"); ex.printStackTrace(System.out); fuzz.instantiate().visit(new PrintStreamVisitor(System.out)); } List<Integer> debugOutput = run(fuzz, debugSC); List<Integer> compilerOutput = null; try { compilerOutput = run(fuzz, compilerSC); } catch (Throwable ex) { for (Throwable t : Throwables.getCausalChain(ex)) if (ignoredExceptions.contains(t.getClass())) { ignored.add(t.getClass()); continue next_case; } System.out.println("Compiler failed"); ex.printStackTrace(System.out); //fall into the if below } if (!debugOutput.equals(compilerOutput)) { ++failures; fuzz.instantiate().visit(new PrintStreamVisitor(System.out)); System.out.println(fuzz.toJava()); //TODO: show only elements where they differ System.out.println("Debug output: " + debugOutput); System.out.println("Compiler output: " + compilerOutput); writeRegressionTest(fuzz); break; } else ++successes; System.out.println(fuzz.hashCode() + " matched"); } System.out.format("Generated %d cases%n", generated); System.out.format(" skipped %d duplicates (%f%%)%n", duplicatesSkipped, ((double) duplicatesSkipped) * 100 / generated); for (Class<?> c : ignoredExceptions) { int count = ignored.count(c); if (count > 0) System.out.format(" ignored %d due to %s (%f%%)%n", count, c, ((double) count) * 100 / generated); } System.out.format("Ran %d cases (%f%% run rate)%n", successes + failures, ((double) successes + failures) * 100 / generated); System.out.format(" %d succeeded (%f%%)%n", successes, ((double) successes) * 100 / (successes + failures)); System.out.format(" %d failed (%f%%)%n", failures, ((double) failures) * 100 / (successes + failures)); }
From source file:org.trnltk.experiment.morphology.ambiguity.AmbiguityClassifier.java
public static void main(String[] args) throws IOException, JSONException { int numberOfWords = 0; int numberOfParseResults = 0; final Multiset<ParseResultDifference> differenceSet = HashMultiset.create(); final Multiset<ParseResultDifference> differenceSetWithoutRootDifferences = HashMultiset.create(); final File folder = new File("D:\\devl\\data\\1MSentences\\split"); final File[] files = folder.listFiles(); if (files == null) throw new RuntimeException(); final StopWatch stopWatch = new StopWatch(); stopWatch.start();//from w w w . j a va 2s. c o m for (int fileIndex = 0; fileIndex < files.length; fileIndex++) { File file = files[fileIndex]; System.out.println("Processing file " + file); // final BufferedReader reader = new BufferedReader(new FileReader(file)); // while (reader.ready()) { // reader.readLine(); // } final ParseResultReader parseResultReader = new ParseResultReader(); final ParseResultDiffTool parseResultDiffTool = new ParseResultDiffTool(); final List<WordParseResultEntry> parseResultEntries = parseResultReader .getParseResultEntries(Files.newReader(file, Charsets.UTF_8)); numberOfWords += parseResultEntries.size(); //noinspection ForLoopReplaceableByForEach for (int parseResultEntryIndex = 0; parseResultEntryIndex < parseResultEntries .size(); parseResultEntryIndex++) { WordParseResultEntry parseResultEntry = parseResultEntries.get(parseResultEntryIndex); final List<ParseResult> parseResults = parseResultEntry.getParseResults(); numberOfParseResults += parseResults.size(); for (int i = 0; i < parseResults.size(); i++) { final ParseResult leftParseResult = parseResults.get(i); for (int j = i + 1; j < parseResults.size(); j++) { final ParseResult rightParseResult = parseResults.get(j); final ParseResultDifference difference = parseResultDiffTool.findDifference(leftParseResult, rightParseResult); final boolean added = differenceSet.add(difference); if (added && difference.hasNoRootDifference() && difference.hasPartDifference()) differenceSetWithoutRootDifferences.add(difference); } } } //noinspection ConstantConditions if (fileIndex == 0) break; } stopWatch.stop(); final long time = stopWatch.getTime(); System.out.println(stopWatch); System.out.println(Long.valueOf(time).doubleValue() / (51)); System.out.println("Number of words : " + numberOfWords); System.out.println("Number of parseResults : " + numberOfParseResults); System.out.println("Number of distinct differences : " + differenceSet.elementSet().size()); System.out.println("numberOfDistinctDifferencesWithoutRootDifference : " + differenceSetWithoutRootDifferences.elementSet().size()); final ImmutableMultiset<ParseResultDifference> sortedDifferenceSetWithoutRootDifferences = Multisets .copyHighestCountFirst(differenceSetWithoutRootDifferences); for (ParseResultDifference parseResultDifference : sortedDifferenceSetWithoutRootDifferences.elementSet()) { final int count = sortedDifferenceSetWithoutRootDifferences.count(parseResultDifference); if (count > 100) { System.out.println(count); System.out.println(parseResultDifference); } } }
From source file:codex.extract.ZipUtils.java
public static Multiset<String> summarizeSources(ZipFile file) { Multiset<String> suffs = HashMultiset.create(); file.stream().forEach(e -> {/*from www . ja v a 2s . c o m*/ if (!e.isDirectory()) suffs.add(suffix(e.getName())); }); return suffs; }
From source file:org.sonar.graph.FeedbackCycle.java
private static Multiset<Edge> createBagWithAllEdgesOfCycles(Set<Cycle> cycles) { Multiset<Edge> edgesBag = HashMultiset.create(); for (Cycle cycle : cycles) { for (Edge edge : cycle.getEdges()) { edgesBag.add(edge); }/*from www . j a v a2 s. c om*/ } return edgesBag; }
From source file:org.apache.drill.plan.ParsePlan.java
private static void validate(Plan r) throws ValidationException { int errors = 0; Formatter errorMessages = new Formatter(); // make sure that each output is assigned only once Multiset<Integer> counts = HashMultiset.create(); int line = 1; for (Op op : r.getStatements()) { for (Arg assignment : op.getOutputs()) { int slot = ((Arg.Symbol) assignment).getSlot(); counts.add(slot); if (counts.count(slot) != 1) { errorMessages.format("Output symbol %%%d used more than once in statement %d\n", slot, line); errors++;/*w w w. jav a 2 s .com*/ } } line++; } // make sure that each input is defined at least once line = 1; for (Op op : r.getStatements()) { for (Arg reference : op.getInputs()) { if (reference instanceof Arg.Symbol) { int slot = ((Arg.Symbol) reference).getSlot(); if (counts.count(slot) <= 0) { errorMessages.format("Undefined reference to %%%d in statement %d\n", slot, line); errors++; } } } line++; } if (errors > 0) { throw new ValidationException(errorMessages.toString()); } }
From source file:org.apache.mahout.knn.tools.Vectorize20NewsGroups.java
static Multiset<String> parse(File f) throws IOException { return Files.readLines(f, Charsets.UTF_8, new LineProcessor<Multiset<String>>() { private boolean readingHeaders = true; private Splitter header = Splitter.on(":").limit(2); private Splitter words = Splitter.on(CharMatcher.forPredicate(new Predicate<Character>() { @Override/*from w w w . j a v a 2s .com*/ public boolean apply(Character ch) { return !Character.isLetterOrDigit(ch) && ch != '.' && ch != '/' && ch != ':'; } })).omitEmptyStrings().trimResults(); private Pattern quotedLine = Pattern.compile("(^In article .*)|(^> .*)|(.*writes:$)|(^\\|>)"); private Multiset<String> counts = HashMultiset.create(); @Override public boolean processLine(String line) throws IOException { if (readingHeaders && line.length() == 0) { readingHeaders = false; } if (readingHeaders) { Iterator<String> i = header.split(line).iterator(); String head = i.next().toLowerCase(); if (legalHeaders.contains(head)) { addText(counts, i.next()); } } else { boolean quote = quotedLine.matcher(line).matches(); if (includeQuotes || !quote) { addText(counts, line); } } return true; } @Override public Multiset<String> getResult() { return counts; } private void addText(Multiset<String> v, String line) { for (String word : words.split(line)) { v.add(word.toLowerCase()); } } }); }
From source file:org.caleydo.view.domino.internal.NodeSelections.java
public static Set<Block> getFullBlocks(Set<NodeGroup> selection) { if (selection.isEmpty()) return Collections.emptySet(); Set<Node> nodes = getFullNodes(selection); if (nodes.isEmpty()) return Collections.emptySet(); Multiset<Block> blocks = HashMultiset.create(); for (Node node : nodes) { Block n = node.getBlock();//from w w w . j a v a 2 s.c om blocks.add(n); } for (Iterator<Block> it = blocks.elementSet().iterator(); it.hasNext();) { Block block = it.next(); if (block.nodeCount() != blocks.count(block)) { it.remove();// not all groups } } return blocks.elementSet(); }