Example usage for com.google.common.collect Multiset add

List of usage examples for com.google.common.collect Multiset add

Introduction

In this page you can find the example usage for com.google.common.collect Multiset add.

Prototype

@Override
boolean add(E element);

Source Link

Document

Adds a single occurrence of the specified element to this multiset.

Usage

From source file:PostgresClassifier.java

public static void main(String[] args) throws Exception {
    if (args.length < 5) {
        System.out.println(//w w  w  .  j a va2  s . co m
                "Arguments: [model] [label index] [dictionnary] [document frequency] [input postgres table]");
        return;
    }
    String modelPath = args[0];
    String labelIndexPath = args[1];
    String dictionaryPath = args[2];
    String documentFrequencyPath = args[3];
    String tablename = args[4];

    Configuration configuration = new Configuration();

    // model is a matrix (wordId, labelId) => probability score
    NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath), configuration);

    StandardNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model);

    // labels is a map label => classId
    Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration, new Path(labelIndexPath));
    Map<String, Integer> dictionary = readDictionnary(configuration, new Path(dictionaryPath));
    Map<Integer, Long> documentFrequency = readDocumentFrequency(configuration,
            new Path(documentFrequencyPath));

    // analyzer used to extract word from tweet
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);

    int labelCount = labels.size();
    int documentCount = documentFrequency.get(-1).intValue();

    System.out.println("Number of labels: " + labelCount);
    System.out.println("Number of documents in training set: " + documentCount);

    Connection c = null;
    Statement stmt = null;
    Statement stmtU = null;
    try {
        Class.forName("org.postgresql.Driver");
        c = DriverManager.getConnection("jdbc:postgresql://192.168.50.170:5432/uzeni", "postgres", "dbwpsdkdl");
        c.setAutoCommit(false);
        System.out.println("Opened database successfully");
        stmt = c.createStatement();
        stmtU = c.createStatement();
        ResultSet rs = stmt.executeQuery("SELECT * FROM " + tablename + " WHERE rep is null");

        while (rs.next()) {
            String seq = rs.getString("seq");
            //String rep = rs.getString("rep");
            String body = rs.getString("body");
            //String category = rep;
            String id = seq;
            String message = body;

            //System.out.println("Doc: " + id + "\t" + message);

            Multiset<String> words = ConcurrentHashMultiset.create();

            // extract words from tweet
            TokenStream ts = analyzer.tokenStream("text", new StringReader(message));
            CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
            ts.reset();
            int wordCount = 0;
            while (ts.incrementToken()) {
                if (termAtt.length() > 0) {
                    String word = ts.getAttribute(CharTermAttribute.class).toString();
                    Integer wordId = dictionary.get(word);
                    // if the word is not in the dictionary, skip it
                    if (wordId != null) {
                        words.add(word);
                        wordCount++;
                    }
                }
            }
            // Mark : Modified 
            ts.end();
            ts.close();

            // create vector wordId => weight using tfidf
            Vector vector = new RandomAccessSparseVector(10000);
            TFIDF tfidf = new TFIDF();
            for (Multiset.Entry<String> entry : words.entrySet()) {
                String word = entry.getElement();
                int count = entry.getCount();
                Integer wordId = dictionary.get(word);
                Long freq = documentFrequency.get(wordId);
                double tfIdfValue = tfidf.calculate(count, freq.intValue(), wordCount, documentCount);
                vector.setQuick(wordId, tfIdfValue);
            }
            // With the classifier, we get one score for each label 
            // The label with the highest score is the one the tweet is more likely to
            // be associated to
            Vector resultVector = classifier.classifyFull(vector);
            double bestScore = -Double.MAX_VALUE;
            int bestCategoryId = -1;
            for (Element element : resultVector.all()) {
                int categoryId = element.index();
                double score = element.get();
                if (score > bestScore) {
                    bestScore = score;
                    bestCategoryId = categoryId;
                }
                //System.out.print("  " + labels.get(categoryId) + ": " + score);
            }
            //System.out.println(" => " + labels.get(bestCategoryId));
            //System.out.println("UPDATE " + tablename + " SET rep = '" + labels.get(bestCategoryId) + "' WHERE seq = " + id );
            stmtU.executeUpdate("UPDATE " + tablename + " SET rep = '" + labels.get(bestCategoryId)
                    + "' WHERE seq = " + id);
        }
        rs.close();
        stmt.close();
        stmtU.close();
        c.commit();
        c.close();
        analyzer.close();
    } catch (Exception e) {
        System.err.println(e.getClass().getName() + ": " + e.getMessage());
        System.exit(0);
    }
}

From source file:org.apache.ctakes.relationextractor.eval.PrintRelationStatistics.java

public static void main(String[] args) throws Exception {
    Options options = new Options();
    CmdLineParser parser = new CmdLineParser(options);
    parser.parseArgument(args);/*  w  w w  . j a v a  2 s .co  m*/

    CollectionReader reader = CollectionReaderFactory.createReader(XReader.class,
            FilesCollectionReader.PARAM_ROOT_FILE, options.trainDirectory.getPath());

    Multiset<Integer> mentionsBetweenCounts = HashMultiset.create();
    JCas jCas = JCasFactory.createJCasFromPath("../ctakes-type-system/desc/common_type_system.xml");
    while (reader.hasNext()) {
        reader.getNext(jCas.getCas());
        JCas goldView = jCas.getView(GOLD_VIEW_NAME);
        for (BinaryTextRelation relation : JCasUtil.select(goldView, BinaryTextRelation.class)) {
            Annotation arg1 = relation.getArg1().getArgument();
            Annotation arg2 = relation.getArg2().getArgument();
            int mentionsBetween;
            if (arg1.getBegin() < arg2.getBegin()) {
                mentionsBetween = JCasUtil
                        .selectCovered(goldView, EntityMention.class, arg1.getEnd(), arg2.getBegin()).size();
            } else {
                mentionsBetween = -JCasUtil
                        .selectCovered(goldView, EntityMention.class, arg2.getEnd(), arg1.getBegin()).size();
            }
            mentionsBetweenCounts.add(mentionsBetween);
        }
    }

    List<Integer> mentionsBetweenKeys = new ArrayList<Integer>(mentionsBetweenCounts.elementSet());
    Collections.sort(mentionsBetweenKeys);
    for (Integer mentionsBetween : mentionsBetweenKeys) {
        System.err.printf("%d x%d\n", mentionsBetween, mentionsBetweenCounts.count(mentionsBetween));
    }
}

From source file:com.music.tools.MidiAnalyzer.java

public static void main(String[] args) {
    Score score = new Score();
    Read.midi(score, "C:\\workspace\\music\\analysis\\midi\\jarre\\EQUINOX3.MID");
    for (Part part : score.getPartArray()) {
        System.out.println(part.getTitle() + " : " + part.getInstrument());
    }//from   www .  j  av a 2s  .  c  o  m
    Part part = score.getPart(1);

    System.out.println(part.getInstrument());
    part.setTempo(160);
    int previousPitch = 0;
    int prePreviousPitch = 0;
    System.out.println(score.getTimeSignature());
    Multiset<Integer> uniqueIntervals = HashMultiset.create();
    int directionChanges = 0;
    int directionRetentions = 0;

    LinkedList<Double> noteLengths = new LinkedList<>();
    for (Note note : part.getPhrase(0).getNoteArray()) {
        System.out.println(note.getPitch());
        if (!note.isRest()) {
            if (prePreviousPitch != 0) {
                int previousDiff = previousPitch - prePreviousPitch;
                int diff = note.getPitch() - previousPitch;
                if (Math.signum(previousDiff) != Math.signum(diff) && diff != 0 && previousDiff != 0) {
                    directionChanges++;
                    System.out.println(prePreviousPitch + ":" + previousPitch + ":" + note.getPitch());
                } else if (diff != 0 && previousDiff != 0) {
                    directionRetentions++;
                }
            }
            if (note.getPitch() - previousPitch != 0) {
                prePreviousPitch = previousPitch;
            }

            uniqueIntervals.add(previousPitch - note.getPitch());
            previousPitch = note.getPitch();
        }
        noteLengths.add(note.getRhythmValue());
    }

    double normalizedBeatSize = 1d * score.getNumerator() * 4 / score.getDenominator();
    System.out.println("Beat size: " + normalizedBeatSize);
    double currentBeatSize = 0;
    int beats = 0;
    int beatsWithPerfectHalves = 0;
    // reverse, to avoid off-beats
    for (Iterator<Double> it = noteLengths.descendingIterator(); it.hasNext();) {
        currentBeatSize += it.next();
        ;
        if (currentBeatSize >= normalizedBeatSize) {
            currentBeatSize = 0;
            beats++;
        }
        if (currentBeatSize == normalizedBeatSize / 2) {
            beatsWithPerfectHalves++;
        }
    }

    System.out.println("Beats:beats with perfect halves -- " + beats + ":" + beatsWithPerfectHalves);

    Hashtable<String, Object> table = PhraseAnalysis.getAllStatistics(score.getPart(1).getPhrase(0), 1, 0,
            Scales.MAJOR_SCALE);
    for (Entry<String, Object> entry : table.entrySet()) {
        System.out.println(entry.getKey() + "=" + entry.getValue());
    }
    for (Integer interval : uniqueIntervals.elementSet()) {
        System.out.println(interval + " : " + uniqueIntervals.count(interval));
    }

    System.out.println("---");

    System.out.println(directionChanges + " : " + directionRetentions);
    Play.midi(part);
}

From source file:edu.mit.streamjit.test.StreamFuzzer.java

public static void main(String[] args) throws InterruptedException, IOException {
    StreamCompiler debugSC = new InterpreterStreamCompiler();
    StreamCompiler compilerSC = new Compiler2StreamCompiler();
    Set<FuzzElement> completedCases = new HashSet<>();
    int generated;
    int duplicatesSkipped = 0;
    Multiset<Class<?>> ignored = HashMultiset.create(ignoredExceptions.size());
    int failures = 0, successes = 0;
    next_case: for (generated = 0; true; ++generated) {
        FuzzElement fuzz = StreamFuzzer.generate();
        if (!completedCases.add(fuzz)) {
            ++duplicatesSkipped;/*  w  w w  .j a v  a  2 s. com*/
            continue;
        }

        try {
            fuzz.instantiate().visit(new CheckVisitor());
        } catch (IllegalStreamGraphException ex) {
            System.out.println("Fuzzer generated bad test case");
            ex.printStackTrace(System.out);
            fuzz.instantiate().visit(new PrintStreamVisitor(System.out));
        }

        List<Integer> debugOutput = run(fuzz, debugSC);
        List<Integer> compilerOutput = null;
        try {
            compilerOutput = run(fuzz, compilerSC);
        } catch (Throwable ex) {
            for (Throwable t : Throwables.getCausalChain(ex))
                if (ignoredExceptions.contains(t.getClass())) {
                    ignored.add(t.getClass());
                    continue next_case;
                }
            System.out.println("Compiler failed");
            ex.printStackTrace(System.out);
            //fall into the if below
        }
        if (!debugOutput.equals(compilerOutput)) {
            ++failures;
            fuzz.instantiate().visit(new PrintStreamVisitor(System.out));
            System.out.println(fuzz.toJava());
            //TODO: show only elements where they differ
            System.out.println("Debug output: " + debugOutput);
            System.out.println("Compiler output: " + compilerOutput);
            writeRegressionTest(fuzz);
            break;
        } else
            ++successes;
        System.out.println(fuzz.hashCode() + " matched");
    }

    System.out.format("Generated %d cases%n", generated);
    System.out.format("  skipped %d duplicates (%f%%)%n", duplicatesSkipped,
            ((double) duplicatesSkipped) * 100 / generated);
    for (Class<?> c : ignoredExceptions) {
        int count = ignored.count(c);
        if (count > 0)
            System.out.format("  ignored %d due to %s (%f%%)%n", count, c, ((double) count) * 100 / generated);
    }
    System.out.format("Ran %d cases (%f%% run rate)%n", successes + failures,
            ((double) successes + failures) * 100 / generated);
    System.out.format("  %d succeeded (%f%%)%n", successes,
            ((double) successes) * 100 / (successes + failures));
    System.out.format("  %d failed (%f%%)%n", failures, ((double) failures) * 100 / (successes + failures));
}

From source file:org.trnltk.experiment.morphology.ambiguity.AmbiguityClassifier.java

public static void main(String[] args) throws IOException, JSONException {
    int numberOfWords = 0;
    int numberOfParseResults = 0;
    final Multiset<ParseResultDifference> differenceSet = HashMultiset.create();
    final Multiset<ParseResultDifference> differenceSetWithoutRootDifferences = HashMultiset.create();

    final File folder = new File("D:\\devl\\data\\1MSentences\\split");

    final File[] files = folder.listFiles();
    if (files == null)
        throw new RuntimeException();

    final StopWatch stopWatch = new StopWatch();
    stopWatch.start();//from   w w w . j a va 2s. c  o m
    for (int fileIndex = 0; fileIndex < files.length; fileIndex++) {
        File file = files[fileIndex];
        System.out.println("Processing file " + file);
        //            final BufferedReader reader = new BufferedReader(new FileReader(file));
        //            while (reader.ready()) {
        //                reader.readLine();
        //            }
        final ParseResultReader parseResultReader = new ParseResultReader();
        final ParseResultDiffTool parseResultDiffTool = new ParseResultDiffTool();

        final List<WordParseResultEntry> parseResultEntries = parseResultReader
                .getParseResultEntries(Files.newReader(file, Charsets.UTF_8));
        numberOfWords += parseResultEntries.size();
        //noinspection ForLoopReplaceableByForEach
        for (int parseResultEntryIndex = 0; parseResultEntryIndex < parseResultEntries
                .size(); parseResultEntryIndex++) {
            WordParseResultEntry parseResultEntry = parseResultEntries.get(parseResultEntryIndex);
            final List<ParseResult> parseResults = parseResultEntry.getParseResults();
            numberOfParseResults += parseResults.size();
            for (int i = 0; i < parseResults.size(); i++) {
                final ParseResult leftParseResult = parseResults.get(i);
                for (int j = i + 1; j < parseResults.size(); j++) {
                    final ParseResult rightParseResult = parseResults.get(j);

                    final ParseResultDifference difference = parseResultDiffTool.findDifference(leftParseResult,
                            rightParseResult);
                    final boolean added = differenceSet.add(difference);
                    if (added && difference.hasNoRootDifference() && difference.hasPartDifference())
                        differenceSetWithoutRootDifferences.add(difference);
                }
            }
        }
        //noinspection ConstantConditions
        if (fileIndex == 0)
            break;
    }

    stopWatch.stop();
    final long time = stopWatch.getTime();
    System.out.println(stopWatch);
    System.out.println(Long.valueOf(time).doubleValue() / (51));

    System.out.println("Number of words : " + numberOfWords);
    System.out.println("Number of parseResults : " + numberOfParseResults);
    System.out.println("Number of distinct differences : " + differenceSet.elementSet().size());
    System.out.println("numberOfDistinctDifferencesWithoutRootDifference : "
            + differenceSetWithoutRootDifferences.elementSet().size());

    final ImmutableMultiset<ParseResultDifference> sortedDifferenceSetWithoutRootDifferences = Multisets
            .copyHighestCountFirst(differenceSetWithoutRootDifferences);
    for (ParseResultDifference parseResultDifference : sortedDifferenceSetWithoutRootDifferences.elementSet()) {
        final int count = sortedDifferenceSetWithoutRootDifferences.count(parseResultDifference);
        if (count > 100) {
            System.out.println(count);
            System.out.println(parseResultDifference);
        }
    }

}

From source file:codex.extract.ZipUtils.java

public static Multiset<String> summarizeSources(ZipFile file) {
    Multiset<String> suffs = HashMultiset.create();
    file.stream().forEach(e -> {/*from www  . ja  v  a  2s .  c  o  m*/
        if (!e.isDirectory())
            suffs.add(suffix(e.getName()));
    });
    return suffs;
}

From source file:org.sonar.graph.FeedbackCycle.java

private static Multiset<Edge> createBagWithAllEdgesOfCycles(Set<Cycle> cycles) {
    Multiset<Edge> edgesBag = HashMultiset.create();
    for (Cycle cycle : cycles) {
        for (Edge edge : cycle.getEdges()) {
            edgesBag.add(edge);
        }/*from  www .  j  a v a2 s.  c om*/
    }
    return edgesBag;
}

From source file:org.apache.drill.plan.ParsePlan.java

private static void validate(Plan r) throws ValidationException {
    int errors = 0;
    Formatter errorMessages = new Formatter();

    // make sure that each output is assigned only once
    Multiset<Integer> counts = HashMultiset.create();
    int line = 1;
    for (Op op : r.getStatements()) {
        for (Arg assignment : op.getOutputs()) {
            int slot = ((Arg.Symbol) assignment).getSlot();
            counts.add(slot);
            if (counts.count(slot) != 1) {
                errorMessages.format("Output symbol %%%d used more than once in statement %d\n", slot, line);
                errors++;/*w w  w. jav a  2 s  .com*/
            }
        }
        line++;
    }

    // make sure that each input is defined at least once
    line = 1;
    for (Op op : r.getStatements()) {
        for (Arg reference : op.getInputs()) {
            if (reference instanceof Arg.Symbol) {
                int slot = ((Arg.Symbol) reference).getSlot();
                if (counts.count(slot) <= 0) {
                    errorMessages.format("Undefined reference to %%%d in statement %d\n", slot, line);
                    errors++;
                }
            }
        }
        line++;
    }

    if (errors > 0) {
        throw new ValidationException(errorMessages.toString());
    }
}

From source file:org.apache.mahout.knn.tools.Vectorize20NewsGroups.java

static Multiset<String> parse(File f) throws IOException {
    return Files.readLines(f, Charsets.UTF_8, new LineProcessor<Multiset<String>>() {
        private boolean readingHeaders = true;
        private Splitter header = Splitter.on(":").limit(2);
        private Splitter words = Splitter.on(CharMatcher.forPredicate(new Predicate<Character>() {
            @Override/*from w  w  w  .  j a  v a  2s  .com*/
            public boolean apply(Character ch) {
                return !Character.isLetterOrDigit(ch) && ch != '.' && ch != '/' && ch != ':';
            }
        })).omitEmptyStrings().trimResults();

        private Pattern quotedLine = Pattern.compile("(^In article .*)|(^> .*)|(.*writes:$)|(^\\|>)");

        private Multiset<String> counts = HashMultiset.create();

        @Override
        public boolean processLine(String line) throws IOException {
            if (readingHeaders && line.length() == 0) {
                readingHeaders = false;
            }

            if (readingHeaders) {
                Iterator<String> i = header.split(line).iterator();
                String head = i.next().toLowerCase();
                if (legalHeaders.contains(head)) {
                    addText(counts, i.next());
                }
            } else {
                boolean quote = quotedLine.matcher(line).matches();
                if (includeQuotes || !quote) {
                    addText(counts, line);
                }
            }
            return true;
        }

        @Override
        public Multiset<String> getResult() {
            return counts;
        }

        private void addText(Multiset<String> v, String line) {
            for (String word : words.split(line)) {
                v.add(word.toLowerCase());
            }
        }
    });
}

From source file:org.caleydo.view.domino.internal.NodeSelections.java

public static Set<Block> getFullBlocks(Set<NodeGroup> selection) {
    if (selection.isEmpty())
        return Collections.emptySet();
    Set<Node> nodes = getFullNodes(selection);
    if (nodes.isEmpty())
        return Collections.emptySet();
    Multiset<Block> blocks = HashMultiset.create();
    for (Node node : nodes) {
        Block n = node.getBlock();//from  w  w w .  j  a v a 2  s.c  om
        blocks.add(n);
    }
    for (Iterator<Block> it = blocks.elementSet().iterator(); it.hasNext();) {
        Block block = it.next();
        if (block.nodeCount() != blocks.count(block)) {
            it.remove();// not all groups
        }
    }
    return blocks.elementSet();
}