Example usage for org.apache.commons.collections4 Bag getCount

List of usage examples for org.apache.commons.collections4 Bag getCount

Introduction

In this page you can find the example usage for org.apache.commons.collections4 Bag getCount.

Prototype

int getCount(Object object);

Source Link

Document

Returns the number of occurrences (cardinality) of the given object currently in the bag.

Usage

From source file:edu.gslis.ts.RunQuery.java

public static void main(String[] args) {
    try {/* w w  w . j  a  va  2s  .c o m*/
        // Get the commandline options
        Options options = createOptions();
        CommandLineParser parser = new GnuParser();
        CommandLine cmd = parser.parse(options, args);

        String inputPath = cmd.getOptionValue("input");
        String eventsPath = cmd.getOptionValue("events");
        String stopPath = cmd.getOptionValue("stop");
        int queryId = Integer.valueOf(cmd.getOptionValue("query"));

        List<String> ids = FileUtils.readLines(new File(inputPath + File.separator + "ids.txt"));

        Stopper stopper = new Stopper(stopPath);
        Map<Integer, FeatureVector> queries = readEvents(eventsPath, stopper);

        FeatureVector query = queries.get(queryId);

        Pairtree ptree = new Pairtree();
        Bag<String> words = new HashBag<String>();

        for (String streamId : ids) {

            String ppath = ptree.mapToPPath(streamId.replace("-", ""));

            String inpath = inputPath + File.separator + ppath + File.separator + streamId + ".xz";
            //                System.out.println(inpath);
            File infile = new File(inpath);
            InputStream in = new XZInputStream(new FileInputStream(infile));

            TTransport inTransport = new TIOStreamTransport(new BufferedInputStream(in));
            TBinaryProtocol inProtocol = new TBinaryProtocol(inTransport);
            inTransport.open();
            final StreamItem item = new StreamItem();

            while (true) {
                try {
                    item.read(inProtocol);
                    //                        System.out.println("Read " + item.stream_id);

                } catch (TTransportException tte) {
                    // END_OF_FILE is used to indicate EOF and is not an exception.
                    if (tte.getType() != TTransportException.END_OF_FILE)
                        tte.printStackTrace();
                    break;
                }
            }

            // Do something with this document...
            String docText = item.getBody().getClean_visible();

            StringTokenizer itr = new StringTokenizer(docText);
            while (itr.hasMoreTokens()) {
                words.add(itr.nextToken());
            }

            inTransport.close();

        }

        for (String term : words.uniqueSet()) {
            System.out.println(term + ":" + words.getCount(term));
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.frequency.phrasedetection.FrequencyCounter.java

/**
 * Write counter with counts from a bag to an output stream.
 *
 * @param os      an {@link OutputStream}
 * @param counter a {@link Bag} of string counter
 *//*w ww . j  av a2 s  .  c o  m*/
private void writeNgrams(OutputStream os, Bag<String> counter) {
    /* create token stream */
    Stream<String> stream = counter.uniqueSet().stream().filter(token -> counter.getCount(token) >= minCount);

    /* sort output */
    if (sortByAlphabet) {
        stream = stream.sorted(String::compareTo);
    } else if (sortByCount) {
        stream = stream.sorted((o1, o2) -> -Integer.compare(counter.getCount(o1), counter.getCount(o2)));
    }

    /* write tokens with counts */
    stream.forEach(token -> {
        try {
            os.write((token + COLUMN_SEPARATOR + counter.getCount(token) + "\n").getBytes());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    });
}

From source file:com.mikenimer.familydam.services.photos.MetadataKeywordsListServlet.java

@Override
protected void doGet(SlingHttpServletRequest request, SlingHttpServletResponse response)
        throws SlingServletException, IOException {
    String path = "/content/dam";

    if (request.getRequestParameter("path") != null) {
        path = request.getRequestParameter("path").getString();
    }/*from   w  w w.  ja  v  a 2 s  .c  om*/

    try {
        Bag bag = new HashBag();

        String stmt = "select * from [fd:image] WHERE ISDESCENDANTNODE([" + path + "])";

        Session session = request.getResourceResolver().adaptTo(Session.class);
        Query query = session.getWorkspace().getQueryManager().createQuery(stmt, Query.JCR_SQL2);
        //query.setLimit(limit);
        //query.setOffset(offset);
        QueryResult results = query.execute();

        // Execute the query and get the results ...
        // (This is the same as before.)
        //javax.jcr.QueryResult result = query.execute();

        NodeIterator nodeIterator = results.getNodes();
        while (nodeIterator.hasNext()) {
            Node n = nodeIterator.nextNode();
            if (n.hasNode("metadata")) {
                Node metaNode = n.getNode("metadata");
                if (metaNode.hasProperty("keywords")) {
                    String keywords = metaNode.getProperty("keywords").getString();
                    String[] keys = keywords.split(",");
                    for (String key : keys) {
                        bag.add(key);
                    }
                }
            }
        }

        Set set = bag.uniqueSet();

        // find scale ratio, we need a 1-12 range

        final JSONWriter w = new JSONWriter(response.getWriter());
        w.setTidy(true);
        w.array();
        for (Object word : set) {
            w.object();
            w.key("word").value(word.toString().toLowerCase());
            w.key("count").value(bag.getCount(word));
            w.key("size").value(Math.max(1.5, Math.min(5, bag.getCount(word) * .05)) + "rem");

            // todo try this.
            // $size = min(max(round(( $size_max*($count-$count_min))/($count_max-$count_min),2), $size_min),$size_max);
            w.endObject();
        }
        w.endArray();

        //writeJsonObject(w, tree);

    } catch (Exception re) {
        re.printStackTrace();
        throw new SlingServletException(new javax.servlet.ServletException(re));
    }
}

From source file:uniol.apt.analysis.cycles.lts.AllSmallCyclesHavePVOne.java

/**
 * Check if a cycle exists whose Parikh vector is incomparable to (1, ..., 1).
 * @param ts The transition system to examine
 * @return true if an incomparable cycle was found, else false.
 *///from www  .  j  a  v a  2 s .c om
static private boolean checkPhase2(TransitionSystem ts) {
    /*
     * Proof that the Parikh vector of generalised cycles is a linear combination of the Parikh vectors of
     * chords:
     * (Preconditions: total reachability, but nothing else)
     *
     * Assume a fixed spanning tree that assigns to each state s a (directed) path from the initial state to
     * s and also the Parikh vector P(s) of this path. Define the Parikh vector of a chord to be
     * P(s[t>s')=P(s)+1_t-P_{s'}.
     *
     * We will proof that the Parikh vector of a directed cycles is a linear combination of the Parikh
     * vectors of the chords. This is shown by proving the more general claim that this also holds for
     * generalised (undirected; allowing arcs to be followed backwards) cycles. Since every directed cycle
     * is also a generalised cycle, the claim follows.
     *
     * We proof this by induction on the number of chords contained in a cycle.
     *
     * If a cycle contains no chords, it can only follow arcs inside the spanning tree and so its Parikh
     * vector must be zero.
     *
     * Assume a cycle pi containing a chord so that we can write pi as pi = s [t> s' [sigma> s where sigma
     * is the remaining cycle. By total reachability, we can use the paths from the initial state to s and
     * s' to construct a new cycle that does not use this chord, but instead goes from s' backwards to the
     * initial state and from there goes forwards to s, both by following the arcs of the spanning tree.
     * This cycle is pi' = s' [sigma> s [-P_s> i [P_{s'}> s'. Note that this is a generalised cycle,
     * because the path leading to state s is followed backwards. Since this cycle contains one chord less,
     * by the induction hypothesis it can be written as a linear combination of chords.
     *
     * The Parikh vector of pi' satisfies: P(pi')=P(sigma)-P(s)+P(s'). By using P(s[t>s')=P(s)+1_t-P_{s'},
     * this can be rewritten to P(pi')=P(sigma)-P(s[t>s')+1_t.
     *
     * We can now write the Parikh vector of pi as P(pi)=1_t+P(sigma)=P(pi')+P(s[t>s') which shows that
     * P(pi) is also a linear combination of the Parikh vectors of chords.
     */

    /*
     * Proof that for any chord s[t>s' we have P(s)+1_t >= P(s'):
     * (Preconditions: determinism, persistence, reversibility, total reachability)
     *
     * Assume P1 holds.
     *
     * In the paper "Characterisation of the State Spaces of Marked Graph Petri Nets" (which was already
     * mentioned above), short paths are defined. Because it does a breath-first-search, our spanning tree
     * computes short paths from the initial state to all other states. Thus, the path sigma(s') to s' with
     * Parikh vector P(s') is a short path. The path sigma(st) that first goes to s via P(s) and then to s'
     * via our chord is either also a short path, or is longer than the length of a short path.
     *
     * Lemma 25 in this paper says that a path is short iff there is some event x which does not appear in
     * it.
     *
     * Lemma 27 in this paper says that for any path from s to s', its Parikh vector is the Parikh vector of
     * a short path from s to s' plus a number m added to each component of the Parikh vector uniformly.
     *
     * Thus, since sigma(s) is a short path, sigma(st) can either be a short path (if one of its entries is
     * null) or contains a cycle (if none of its entries is null). In the first case we have P(s)+1_t=P(s')
     * and in the second case we have P(s)+1_t >= P(s').
     *
     * Thus, if we find a chord for which P(s)+1_t >= P(s') does not hold, then the transition system does
     * not satisfy P1, because we already checked for determinism and persistence.
     */

    /*
     * Proof that for any chord, all entries must be the same:
     * (Preconditions: determinism, persistence, reversibility, total reachability)
     *
     * Assume P1 holds.
     *
     * Lemma 27 in the paper says that for any path from s to s', its Parikh vector is the Parikh vector of
     * a short path from s to s' plus a number m added to each component of the Parikh vector uniformly.
     *
     * We already know that the path to s' via the spanning tree is a short path. Lemma 27 says that the
     * path going to s and then following s[t>s' must have this Parikh vector plus a constant number.
     * Thus, their difference must be the same number in each component.
     *
     * Again, because determinism and persistence were already checked, if this does not hold, then P1 must
     * be violated.
     */

    SpanningTree<TransitionSystem, Arc, State> tree = SpanningTree.get(ts, ts.getInitialState());
    for (Arc chord : tree.getChords()) {
        State source = chord.getSource();
        State target = chord.getTarget();
        Bag<String> pSource = getParikhVectorReaching(tree, source);
        Bag<String> pTarget = getParikhVectorReaching(tree, target);
        pSource.add(chord.getLabel());

        int expected = -1;
        for (String event : ts.getAlphabet()) {
            int val = pSource.getCount(event) - pTarget.getCount(event);

            if (expected == -1)
                // First arc: We will expect this value for other arcs
                expected = val;

            if (val < 0 || val != expected) {
                debugFormat(
                        "Chord %s shows that P1 is not satisfied, because %s+%s-%s has "
                                + "either a negative entry or not all entries have the same "
                                + " number of occurrences",
                        chord, tree.getEdgePathFromStart(source), chord.getLabel(),
                        tree.getEdgePathFromStart(target));
                return true;
            }
        }
    }

    /*
     * If we get here, then we know:
     *
     * - The Parikh vector of any cycle is a linear combination of the Parikh vectors of chords
     * - All chords have constant (all entries the same) Parikh vectors
     * - All chords have Parikh vectors >= 0
     * => All cycles must have Parikh vectors of the form (m, ..., m)
     * => There can be no cycle with a Parikh vector incomparable to (1, ..., 1)
     */

    return false;
}

From source file:uniol.apt.analysis.synthesize.separation.KBoundedSeparation.java

private Bag<State> expand(Bag<State> input, Event event, int g, boolean forward) {
    TransitionSystem ts = utility.getTransitionSystem();
    Bag<State> result = new HashBag<State>();

    for (State state : ts.getNodes()) {
        int increment = 0;
        for (Arc arc : forward ? state.getPostsetEdges() : state.getPresetEdges()) {
            if (arc.getEvent().equals(event)) {
                int value = getGradient(input, arc) - g;
                if (!forward)
                    value = -value;// ww  w.java 2s . c  o  m
                if (value > increment)
                    increment = value;
            }
        }
        result.add(state, input.getCount(state) + increment);
    }

    return result;
}

From source file:uniol.apt.analysis.synthesize.separation.KBoundedSeparation.java

private int getGradient(Bag<State> r, Arc arc) {
    return r.getCount(arc.getTarget()) - r.getCount(arc.getSource());
}

From source file:uniol.apt.analysis.synthesize.separation.KBoundedSeparation.java

private boolean shouldExplore(Bag<State> r, int k) {
    // Don't continue if no state has cardinality zero, because the result won't be a minimal region
    if (r.containsAll(utility.getTransitionSystem().getNodes()))
        return false;

    // Don't continue if some state has cardinality higher than k
    for (State state : r.uniqueSet())
        if (r.getCount(state) > k)
            return false;

    return true;//from ww  w .  j  a  v a 2s.c  o m
}

From source file:uniol.apt.analysis.synthesize.separation.KBoundedSeparation.java

private Region convertToRegion(Bag<State> r) {
    TransitionSystem ts = utility.getTransitionSystem();
    Region.Builder builder = new Region.Builder(utility);

    for (Event event : ts.getAlphabetEvents()) {
        Arc representativeArc = null;/*from w w  w .ja v  a 2 s  .  c  om*/
        int minEnabledValue = Integer.MAX_VALUE;
        for (Arc arc : ts.getEdges()) {
            if (arc.getEvent().equals(event)) {
                representativeArc = arc;
                minEnabledValue = Math.min(minEnabledValue, r.getCount(arc.getSource()));
            }
        }

        // TS should not have an event which is not the label of any arc
        assert representativeArc != null;

        int gradient = getGradient(r, representativeArc);
        int forward = 0;
        int backward = 0;
        if (!pure) {
            backward = minEnabledValue;
            forward = minEnabledValue + gradient;
        } else {
            if (gradient > 0)
                forward = gradient;
            else
                backward = -gradient;
        }

        builder.addWeightOn(event.getLabel(), BigInteger.valueOf(-backward));
        builder.addWeightOn(event.getLabel(), BigInteger.valueOf(forward));
    }

    int initial = r.getCount(utility.getTransitionSystem().getInitialState());
    Region region = builder.withInitialMarking(BigInteger.valueOf(initial));
    debugFormat("Region %s corresponds to %s", region, r);
    return region;
}