Example usage for org.apache.commons.collections4 Bag uniqueSet

List of usage examples for org.apache.commons.collections4 Bag uniqueSet

Introduction

In this page you can find the example usage for org.apache.commons.collections4 Bag uniqueSet.

Prototype

Set<E> uniqueSet();

Source Link

Document

Returns a Set of unique elements in the Bag.

Usage

From source file:edu.gslis.ts.RunQuery.java

public static void main(String[] args) {
    try {//from   w  ww  .ja v  a  2 s.  co m
        // Get the commandline options
        Options options = createOptions();
        CommandLineParser parser = new GnuParser();
        CommandLine cmd = parser.parse(options, args);

        String inputPath = cmd.getOptionValue("input");
        String eventsPath = cmd.getOptionValue("events");
        String stopPath = cmd.getOptionValue("stop");
        int queryId = Integer.valueOf(cmd.getOptionValue("query"));

        List<String> ids = FileUtils.readLines(new File(inputPath + File.separator + "ids.txt"));

        Stopper stopper = new Stopper(stopPath);
        Map<Integer, FeatureVector> queries = readEvents(eventsPath, stopper);

        FeatureVector query = queries.get(queryId);

        Pairtree ptree = new Pairtree();
        Bag<String> words = new HashBag<String>();

        for (String streamId : ids) {

            String ppath = ptree.mapToPPath(streamId.replace("-", ""));

            String inpath = inputPath + File.separator + ppath + File.separator + streamId + ".xz";
            //                System.out.println(inpath);
            File infile = new File(inpath);
            InputStream in = new XZInputStream(new FileInputStream(infile));

            TTransport inTransport = new TIOStreamTransport(new BufferedInputStream(in));
            TBinaryProtocol inProtocol = new TBinaryProtocol(inTransport);
            inTransport.open();
            final StreamItem item = new StreamItem();

            while (true) {
                try {
                    item.read(inProtocol);
                    //                        System.out.println("Read " + item.stream_id);

                } catch (TTransportException tte) {
                    // END_OF_FILE is used to indicate EOF and is not an exception.
                    if (tte.getType() != TTransportException.END_OF_FILE)
                        tte.printStackTrace();
                    break;
                }
            }

            // Do something with this document...
            String docText = item.getBody().getClean_visible();

            StringTokenizer itr = new StringTokenizer(docText);
            while (itr.hasMoreTokens()) {
                words.add(itr.nextToken());
            }

            inTransport.close();

        }

        for (String term : words.uniqueSet()) {
            System.out.println(term + ":" + words.getCount(term));
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.frequency.phrasedetection.FrequencyCounter.java

/**
 * Write counter with counts from a bag to an output stream.
 *
 * @param os      an {@link OutputStream}
 * @param counter a {@link Bag} of string counter
 *//*  w  w w.j  av  a 2 s.c o  m*/
private void writeNgrams(OutputStream os, Bag<String> counter) {
    /* create token stream */
    Stream<String> stream = counter.uniqueSet().stream().filter(token -> counter.getCount(token) >= minCount);

    /* sort output */
    if (sortByAlphabet) {
        stream = stream.sorted(String::compareTo);
    } else if (sortByCount) {
        stream = stream.sorted((o1, o2) -> -Integer.compare(counter.getCount(o1), counter.getCount(o2)));
    }

    /* write tokens with counts */
    stream.forEach(token -> {
        try {
            os.write((token + COLUMN_SEPARATOR + counter.getCount(token) + "\n").getBytes());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    });
}

From source file:com.mikenimer.familydam.services.photos.MetadataKeywordsListServlet.java

@Override
protected void doGet(SlingHttpServletRequest request, SlingHttpServletResponse response)
        throws SlingServletException, IOException {
    String path = "/content/dam";

    if (request.getRequestParameter("path") != null) {
        path = request.getRequestParameter("path").getString();
    }//w  w w  . j a v a  2s .  c  om

    try {
        Bag bag = new HashBag();

        String stmt = "select * from [fd:image] WHERE ISDESCENDANTNODE([" + path + "])";

        Session session = request.getResourceResolver().adaptTo(Session.class);
        Query query = session.getWorkspace().getQueryManager().createQuery(stmt, Query.JCR_SQL2);
        //query.setLimit(limit);
        //query.setOffset(offset);
        QueryResult results = query.execute();

        // Execute the query and get the results ...
        // (This is the same as before.)
        //javax.jcr.QueryResult result = query.execute();

        NodeIterator nodeIterator = results.getNodes();
        while (nodeIterator.hasNext()) {
            Node n = nodeIterator.nextNode();
            if (n.hasNode("metadata")) {
                Node metaNode = n.getNode("metadata");
                if (metaNode.hasProperty("keywords")) {
                    String keywords = metaNode.getProperty("keywords").getString();
                    String[] keys = keywords.split(",");
                    for (String key : keys) {
                        bag.add(key);
                    }
                }
            }
        }

        Set set = bag.uniqueSet();

        // find scale ratio, we need a 1-12 range

        final JSONWriter w = new JSONWriter(response.getWriter());
        w.setTidy(true);
        w.array();
        for (Object word : set) {
            w.object();
            w.key("word").value(word.toString().toLowerCase());
            w.key("count").value(bag.getCount(word));
            w.key("size").value(Math.max(1.5, Math.min(5, bag.getCount(word) * .05)) + "rem");

            // todo try this.
            // $size = min(max(round(( $size_max*($count-$count_min))/($count_max-$count_min),2), $size_min),$size_max);
            w.endObject();
        }
        w.endArray();

        //writeJsonObject(w, tree);

    } catch (Exception re) {
        re.printStackTrace();
        throw new SlingServletException(new javax.servlet.ServletException(re));
    }
}

From source file:uniol.apt.analysis.synthesize.separation.KBoundedSeparation.java

private boolean shouldExplore(Bag<State> r, int k) {
    // Don't continue if no state has cardinality zero, because the result won't be a minimal region
    if (r.containsAll(utility.getTransitionSystem().getNodes()))
        return false;

    // Don't continue if some state has cardinality higher than k
    for (State state : r.uniqueSet())
        if (r.getCount(state) > k)
            return false;

    return true;//  w ww.j a  v  a  2  s.  co m
}