Example usage for com.google.common.collect Multiset elementSet

List of usage examples for com.google.common.collect Multiset elementSet

Introduction

In this page you can find the example usage for com.google.common.collect Multiset elementSet.

Prototype

Set<E> elementSet();

Source Link

Document

Returns the set of distinct elements contained in this multiset.

Usage

From source file:com.b2international.snowowl.snomed.reasoner.server.diff.SourceConceptNamespaceAndModuleAssigner.java

@Override
public void allocateRelationshipIdsAndModules(Multiset<String> conceptIds,
        final SnomedEditingContext editingContext) {
    Multiset<String> reservedIdsByNamespace = HashMultiset.create();
    for (Multiset.Entry<String> conceptIdWithCount : conceptIds.entrySet()) {
        String namespace = SnomedIdentifiers.getNamespace(conceptIdWithCount.getElement());
        reservedIdsByNamespace.add(namespace, conceptIdWithCount.getCount());
    }/*from   w w w. j  a v  a2s.com*/

    ISnomedIdentifierService identifierService = getServiceForClass(ISnomedIdentifierService.class);
    for (Multiset.Entry<String> namespaceWithCount : reservedIdsByNamespace.entrySet()) {
        Collection<String> reservedIds = identifierService.reserve(namespaceWithCount.getElement(),
                ComponentCategory.RELATIONSHIP, namespaceWithCount.getCount());
        this.reservedIds.addAll(reservedIds);
        namespaceToRelationshipIdMap.put(namespaceWithCount.getElement(), reservedIds.iterator());
    }

    for (String conceptId : conceptIds.elementSet()) {
        Concept concept = editingContext.lookup(conceptId, Concept.class);
        conceptIdToRelationshipModuleMap.put(conceptId, concept.getModule());
    }
}

From source file:com.cloudera.knittingboar.records.RCV1RecordFactory.java

public static void ScanFile(String file, int debug_break_cnt) throws IOException {

    ConstantValueEncoder encoder_test = new ConstantValueEncoder("test");

    BufferedReader reader = null;
    // Collection<String> words
    int line_count = 0;

    Multiset<String> class_count = ConcurrentHashMultiset.create();
    Multiset<String> namespaces = ConcurrentHashMultiset.create();

    try {// w  w w  .  ja v  a  2  s  . c om
        // System.out.println( newsgroup );
        reader = new BufferedReader(new FileReader(file));

        String line = reader.readLine();

        while (line != null && line.length() > 0) {

            // shard_writer.write(line + "\n");
            // out += line;

            String[] parts = line.split(" ");

            // System.out.println( "Class: " + parts[0] );

            class_count.add(parts[0]);
            namespaces.add(parts[1]);

            line = reader.readLine();
            line_count++;

            Vector v = new RandomAccessSparseVector(FEATURES);

            for (int x = 2; x < parts.length; x++) {
                // encoder_test.addToVector(parts[x], v);
                // System.out.println( parts[x] );
                String[] feature = parts[x].split(":");
                int index = Integer.parseInt(feature[0]) % FEATURES;
                double val = Double.parseDouble(feature[1]);

                // System.out.println( feature[1] + " = " + val );

                if (index < FEATURES) {
                    v.set(index, val);
                } else {

                    System.out.println("Could Hash: " + index + " to " + (index % FEATURES));

                }

            }

            Utils.PrintVectorSectionNonZero(v, 10);
            System.out.println("###");

            if (line_count > debug_break_cnt) {
                break;
            }

        }

        System.out.println("Total Rec Count: " + line_count);

        System.out.println("-------------------- ");

        System.out.println("Classes");
        for (String word : class_count.elementSet()) {
            System.out.println("Class " + word + ": " + class_count.count(word) + " ");
        }

        System.out.println("-------------------- ");

        System.out.println("NameSpaces:");
        for (String word : namespaces.elementSet()) {
            System.out.println("Namespace " + word + ": " + namespaces.count(word) + " ");
        }

        /*
         * TokenStream ts = analyzer.tokenStream("text", reader);
         * ts.addAttribute(CharTermAttribute.class);
         * 
         * // for each word in the stream, minus non-word stuff, add word to
         * collection while (ts.incrementToken()) { String s =
         * ts.getAttribute(CharTermAttribute.class).toString();
         * //System.out.print( " " + s ); //words.add(s); out += s + " "; }
         */

    } finally {
        reader.close();
    }

    // return out + "\n";

}

From source file:com.github.pffy.chinese.freq.ImmutableChineseFrequency.java

/**
 * Builds this object with an input text. Expecting Chinese characters.
 * //w w w .j a  v  a  2 s  .  c o m
 * @param input Chinese text for frequency analysis
 */
public ImmutableChineseFrequency(String input) {

    init();

    // Counts
    int inputCount, removedCount, hanziCount, uniqueHanziCount, processedCount;

    Scanner sc;

    // Output data
    String csvOutput = "";
    String tsvOutput = "";
    String txtOutput = "";

    String csv, tsv, txt;
    String summaryString = "";

    String hz, py;
    int freq;

    // Google Guava magic
    String hanzi;
    Multiset<String> hanziSet = HashMultiset.create();

    Iterable<Multiset.Entry<String>> hanziSortedByCount;
    Iterator<Multiset.Entry<String>> keys;
    Multiset.Entry<String> key;

    ImmutableList<Multiset.Entry<String>> hanziList;

    if (input == null || input.isEmpty()) {
        throw new NullPointerException(this.MSG_EMPTYNULL_STRING);
    }

    inputCount = input.length();

    input = retainHanzi(input);
    removedCount = inputCount - input.length();

    hanziCount = input.length();

    sc = new Scanner(input);
    sc.useDelimiter("");

    // accumulate: counts occurrences
    while (sc.hasNext()) {
        hanzi = (String) sc.next();
        hanziSet.add(hanzi, 1);
    }

    sc.close();

    uniqueHanziCount = hanziSet.elementSet().size();
    processedCount = 0;

    hanziSortedByCount = Multisets.copyHighestCountFirst(hanziSet).entrySet();
    hanziList = Multisets.copyHighestCountFirst(hanziSet).entrySet().asList();
    keys = hanziSortedByCount.iterator();

    while (keys.hasNext()) {

        key = (Multiset.Entry<String>) keys.next();

        hz = (String) key.getElement().replaceAll("x \\d{1,}", "");
        py = (String) this.hpdx.get(hz);
        freq = (int) key.getCount();

        // check null first to avoid NullPointerException. lazy code.
        if (py == null || py.isEmpty()) {
            // not mapped yet. that is okay move on.
            continue;
        }

        csv = this.CRLF + hz + "," + py + "," + freq;
        csvOutput += csv;

        tsv = this.CRLF + hz + "\t" + py + "\t" + freq;
        tsvOutput += tsv;

        txt = this.CRLF + padSummary(hz + " [" + py + "]", this.PADSIZE_SUMMARY) + freq;
        txtOutput += txt;

        processedCount++;
    }

    summaryString += padSummary(this.MSG_TOTAL_COUNT, this.PADSIZE_SUMMARY) + inputCount;

    summaryString += this.CRLF + padSummary(this.MSG_REMOVED_COUNT, this.PADSIZE_SUMMARY) + removedCount;
    summaryString += this.CRLF + padSummary(this.MSG_HANZI_COUNT, this.PADSIZE_SUMMARY) + hanziCount;
    summaryString += this.CRLF + padSummary(this.MSG_UNIQUE_COUNT, this.PADSIZE_SUMMARY) + uniqueHanziCount;
    summaryString += this.CRLF + padSummary(this.MSG_PROCESSED_COUNT, this.PADSIZE_SUMMARY) + processedCount;

    if (processedCount > 0) {

        csvOutput = this.HEADER_ROW_CSV + csvOutput;
        tsvOutput = this.HEADER_ROW_TSV + tsvOutput;
        txtOutput = this.HEADER_ROW_TXT + txtOutput;

    }

    this.input = input;
    this.inputCount = inputCount;
    this.removedCount = removedCount;
    this.hanziCount = hanziCount;
    this.uniqueHanziCount = uniqueHanziCount;
    this.processedCount = processedCount;
    this.summary = summaryString;
    this.hanziList = hanziList;

    this.csvOutput = csvOutput;
    this.tsvOutput = tsvOutput;
    this.txtOutput = txtOutput;
}

From source file:org.eclipse.sirius.ui.debug.SiriusDebugView.java

private void addShowPayloadAccessLogAction() {
    addAction("Show Payload Access Log", new Runnable() {
        @Override/*w  w  w.  j  a  va  2  s .c om*/
        public void run() {
            int max = 50;
            List<FeatureAccess> log = PayloadMarkerAdapter.INSTANCE.getAccessLog();
            int totalSize = log.size();
            int shown = Math.min(totalSize, max);
            TabularReport tr = new TabularReport(/* "Timestamp", */"EObject", "Feature");

            try {
                PayloadMarkerAdapter.INSTANCE.setEnable(false);
                for (int i = log.size() > max ? log.size() - max : 0; i < log.size(); i++) {
                    FeatureAccess featureAccess = log.get(i);
                    tr.addLine(Arrays.asList(/*
                                              * String.format("%tT",
                                              * featureAccess.timestamp),
                                              */((Component) featureAccess.setting.getEObject()).getName(),
                            featureAccess.setting.getEStructuralFeature().getName()));
                }
            } finally {
                PayloadMarkerAdapter.INSTANCE.setEnable(true);
            }
            StringBuilder sb = new StringBuilder();
            sb.append("Showing " + shown + " of " + totalSize + " accesses.\n");
            Multiset<String> contexts = PayloadMarkerAdapter.INSTANCE.getUniqueContexts();
            sb.append("Unique contexts: " + contexts.elementSet().size()).append("\n\n");

            int i = 0;
            for (String stack : contexts.elementSet()) {
                int count = contexts.count(stack);
                sb.append("Context #" + i++ + " (" + count + " occurrences)").append("\n");
                sb.append(stack).append("\n");
            }

            sb.append("\n").append(tr.print()).append("\n");
            setText(sb.toString());
        }
    });
}

From source file:fabric.worker.transaction.DeadlockDetectorThread.java

/**
 * Resolves deadlocks by aborting transactions.
 * // w ww  . j a va  2 s  . c o  m
 * @param cycles
 *          the set of deadlocks, represented by the logs of transactions
 *          involved in waits-for cycles.
 */
private void resolveDeadlocks(Set<Set<Log>> cycles) {
    // Turn the set of cycles into a map from top-level TIDs to sorted multisets
    // of transaction logs. The multisets are sorted by transaction depth, outer
    // transactions first.
    LongKeyMap<Multiset<Log>> logsByTopLevelTid = new LongKeyHashMap<Multiset<Log>>();
    for (Set<Log> cycle : cycles) {
        for (Log log : cycle) {
            long topLevelTid = log.getTid().topTid;
            Multiset<Log> logs = logsByTopLevelTid.get(topLevelTid);
            if (logs == null) {
                logs = TreeMultiset.create(LOG_COMPARATOR);
                logsByTopLevelTid.put(topLevelTid, logs);
            }

            logs.add(log);
        }
    }

    // Abort transactions to break up cycles. Transactions involved in more
    // cycles are aborted first.
    while (!cycles.isEmpty()) {
        // Figure out which top-level transaction(s) is involved in the most number
        // of deadlocks.
        int curMax = 0;
        LongSet abortCandidates = new LongHashSet();
        for (LongKeyMap.Entry<Multiset<Log>> entry : logsByTopLevelTid.entrySet()) {
            int curSize = entry.getValue().size();
            if (curMax > curSize)
                continue;

            if (curMax < curSize) {
                curMax = curSize;
                abortCandidates.clear();
            }

            abortCandidates.add(entry.getKey());
        }

        // Figure out which transaction to abort. (Pick the newest one.)
        Log toAbort = null;
        Multiset<Log> abortSet = null;
        for (LongIterator it = abortCandidates.iterator(); it.hasNext();) {
            long curTopLevelTid = it.next();
            Multiset<Log> curCandidateSet = logsByTopLevelTid.get(curTopLevelTid);
            Log curCandidate = curCandidateSet.iterator().next();

            if (toAbort == null || toAbort.startTime < curCandidate.startTime) {
                toAbort = curCandidate;
                abortSet = curCandidateSet;
            }
        }

        // Abort the transaction.
        WORKER_DEADLOCK_LOGGER.log(Level.FINE, "Aborting {0}", toAbort);
        toAbort.flagRetry();

        // Fix up our data structures to reflect the aborted transaction.
        for (Iterator<Set<Log>> cycleIt = cycles.iterator(); cycleIt.hasNext();) {
            Set<Log> cycle = cycleIt.next();

            // Check if the cycle has a transaction that was aborted.
            if (!haveCommonElements(cycle, abortSet.elementSet()))
                continue;

            // Cycle was broken, so remove from the set of cycles.
            cycleIt.remove();

            // Fix up logsByTopLevelTid.
            for (Log log : cycle) {
                long topLevelTid = log.getTid().topTid;
                Multiset<Log> logs = logsByTopLevelTid.get(topLevelTid);
                logs.remove(log);
                if (logs.isEmpty()) {
                    logsByTopLevelTid.remove(topLevelTid);
                }
            }
        }
    }
}

From source file:org.bridgedb.tools.qc.PatternChecker.java

public void run(File f) throws SQLException, IDMapperException {
    String database = "" + f;
    //TODO: we can use the new Iterator interface here...
    DBConnector con = new DataDerby();
    Connection sqlcon = null;/*  w w w  .j a v a  2s  .c o  m*/
    sqlcon = con.createConnection(database, 0);

    Multimap<DataSource, String> missExamples = HashMultimap.create();
    Multiset<DataSource> misses = HashMultiset.create();
    Multiset<DataSource> totals = HashMultiset.create();
    Map<DataSource, Pattern> patterns = DataSourcePatterns.getPatterns();

    //      String url = "jdbc:derby:jar:(" + f + ")database";
    //      IDMapperRdb gdb = SimpleGdbFactory.createInstance("" + f, url);

    Statement st = sqlcon.createStatement();
    ResultSet rs = st.executeQuery("select id, code from datanode");

    while (rs.next()) {
        String id = rs.getString(1);
        String syscode = rs.getString(2);
        if (DataSource.systemCodeExists(syscode)) {
            DataSource ds = DataSource.getExistingBySystemCode(syscode);
            if (patterns.get(ds) == null)
                continue; // skip if there is no pattern defined.

            Set<DataSource> matches = DataSourcePatterns.getDataSourceMatches(id);
            if (!matches.contains(ds)) {
                if (missExamples.get(ds).size() < 10)
                    missExamples.put(ds, id);
                misses.add(ds);
            }
            totals.add(ds);
        }
    }

    //         String code = rs.getString (2);
    //System.out.println (id + "\t" + code);

    for (DataSource ds : totals.elementSet()) {
        int miss = misses.count(ds);
        int total = totals.count(ds);

        if (miss > 0) {
            String severity = miss < (total / 25) ? "WARNING" : "ERROR";
            System.out.println(severity + ": " + miss + "/" + total + " (" + miss * 100 / total
                    + "%) ids do not match expected pattern for " + ds);
            System.out.println(severity + ": expected pattern is '" + patterns.get(ds) + "'");
            boolean first = true;
            for (String id : missExamples.get(ds)) {
                System.out.print(first ? severity + ": aberrant ids are e.g. " : ", ");
                first = false;
                System.out.print("'" + id + "'");
            }
            System.out.println();
        }
    }

    allMisses.addAll(misses);
    allTotals.addAll(totals);
}

From source file:org.apache.mahout.classifier.sgd.NewsgroupHelper.java

Vector encodeFeatureVector(File file, int actual, int leakType, Multiset<String> overallCounts)
        throws IOException {
    long date = (long) (1000 * (DATE_REFERENCE + actual * MONTH + 1 * WEEK * rand.nextDouble()));
    Multiset<String> words = ConcurrentHashMultiset.create();

    BufferedReader reader = Files.newReader(file, Charsets.UTF_8);
    try {/*  w  w  w .  j a va  2s .c  o  m*/
        String line = reader.readLine();
        Reader dateString = new StringReader(DATE_FORMATS[leakType % 3].format(new Date(date)));
        countWords(analyzer, words, dateString, overallCounts);
        while (line != null && !line.isEmpty()) {
            boolean countHeader = (line.startsWith("From:") || line.startsWith("Subject:")
                    || line.startsWith("Keywords:") || line.startsWith("Summary:")) && leakType < 6;
            do {
                Reader in = new StringReader(line);
                if (countHeader) {
                    countWords(analyzer, words, in, overallCounts);
                }
                line = reader.readLine();
            } while (line != null && line.startsWith(" "));
        }
        if (leakType < 3) {
            countWords(analyzer, words, reader, overallCounts);
        }
    } finally {
        Closeables.closeQuietly(reader);
    }

    Vector v = new RandomAccessSparseVector(FEATURES);
    bias.addToVector("", 1, v);
    for (String word : words.elementSet()) {
        encoder.addToVector(word, Math.log1p(words.count(word)), v);
    }

    return v;
}

From source file:org.apache.mahout.classifier.NewsgroupHelper.java

public Vector encodeFeatureVector(File file, int actual, int leakType, Multiset<String> overallCounts)
        throws IOException {
    long date = (long) (1000 * (DATE_REFERENCE + actual * MONTH + 1 * WEEK * rand.nextDouble()));
    Multiset<String> words = ConcurrentHashMultiset.create();

    BufferedReader reader = Files.newReader(file, Charsets.UTF_8);
    try {/*from  w ww  .  j  av  a  2  s  . c  om*/
        String line = reader.readLine();
        Reader dateString = new StringReader(DATE_FORMATS[leakType % 3].format(new Date(date)));
        countWords(analyzer, words, dateString, overallCounts);
        while (line != null && !line.isEmpty()) {
            boolean countHeader = (line.startsWith("From:") || line.startsWith("Subject:")
                    || line.startsWith("Keywords:") || line.startsWith("Summary:")) && leakType < 6;
            do {
                Reader in = new StringReader(line);
                if (countHeader) {
                    countWords(analyzer, words, in, overallCounts);
                }
                line = reader.readLine();
            } while (line != null && line.startsWith(" "));
        }
        if (leakType < 3) {
            countWords(analyzer, words, reader, overallCounts);
        }
    } finally {
        Closeables.close(reader, true);
    }

    Vector v = new RandomAccessSparseVector(FEATURES);
    bias.addToVector("", 1, v);
    for (String word : words.elementSet()) {
        encoder.addToVector(word, Math.log1p(words.count(word)), v);
    }

    return v;
}

From source file:org.eclipse.incquery.runtime.base.core.NavigationHelperImpl.java

@Override
public Set<EObject> getHoldersOfFeature(EStructuralFeature _feature) {
    Object feature = toKey(_feature);
    Multiset<EObject> holders = contentAdapter.getFeatureToHolderMap().get(feature);
    if (holders == null) {
        return Collections.emptySet();
    } else {//w  w  w. j a va2  s.c o  m
        return Collections.unmodifiableSet(holders.elementSet());
    }
}

From source file:BibTex.IOmethods.java

public void writeConnectedCategories(Set<BibTexRef> refs) throws IOException {

    BufferedWriter bw = new BufferedWriter(new FileWriter(folder + "connected categories.csv"));
    StringBuilder sb = new StringBuilder();
    int maxCountCategory = 0;
    sb.append("Source,Target,Type,Weight").append("\n");

    //creation of convenient data structures for I/O
    Multiset<Edge> edges = HashMultiset.create();
    Multiset<String> multisetCategoryNames = HashMultiset.create();

    for (BibTexRef ref : refs) {
        Set<Category> categories = ref.getCategories();
        Set<String> categoriesNames = new HashSet();

        for (Category category : categories) {
            categoriesNames.add(category.getCategoryName());
            multisetCategoryNames.add(category.getCategoryName());
        }//w  ww. jav a  2  s. c om

        FindAllPairs findAllPairs = new FindAllPairs();
        List<Pair<String>> pairs = findAllPairs.getAllUndirectedPairsAsList(categoriesNames);

        for (Pair<String> pair : pairs) {
            Edge edge = new Edge();
            edge.setNode1(pair.getLeft());
            edge.setNode2(pair.getRight());
            edges.add(edge);

        }

    }

    //finding the max number for a category, for normalization purposes
    for (String string : multisetCategoryNames.elementSet()) {
        if (maxCountCategory < multisetCategoryNames.count(string)) {
            maxCountCategory = multisetCategoryNames.count(string);
        }
    }

    //writing of the first line of the csv: headers of the categories.
    for (Edge edge : edges.elementSet()) {
        //we devalue the weight of an edge by how frequent the 2 nodes of the edge are.
        float weight = edges.count(edge) / (float) (multisetCategoryNames.count(edge.getNode1())
                * multisetCategoryNames.count(edge.getNode2()));
        //            float weight = edges.count(edge);
        //normalization to a 0 -> 10 scale to visualize the weight on Gephi
        weight = weight * 10 / (float) maxCountCategory * 100000;
        sb.append(edge.getNode1()).append(",").append(edge.getNode2()).append(",Undirected,").append(weight);
        sb.append("\n");
    }
    bw.write(sb.toString());
    bw.close();
}