List of usage examples for org.apache.commons.collections4 Bag uniqueSet
Set<E> uniqueSet();
From source file:edu.gslis.ts.RunQuery.java
public static void main(String[] args) { try {//from w ww .ja v a 2 s. co m // Get the commandline options Options options = createOptions(); CommandLineParser parser = new GnuParser(); CommandLine cmd = parser.parse(options, args); String inputPath = cmd.getOptionValue("input"); String eventsPath = cmd.getOptionValue("events"); String stopPath = cmd.getOptionValue("stop"); int queryId = Integer.valueOf(cmd.getOptionValue("query")); List<String> ids = FileUtils.readLines(new File(inputPath + File.separator + "ids.txt")); Stopper stopper = new Stopper(stopPath); Map<Integer, FeatureVector> queries = readEvents(eventsPath, stopper); FeatureVector query = queries.get(queryId); Pairtree ptree = new Pairtree(); Bag<String> words = new HashBag<String>(); for (String streamId : ids) { String ppath = ptree.mapToPPath(streamId.replace("-", "")); String inpath = inputPath + File.separator + ppath + File.separator + streamId + ".xz"; // System.out.println(inpath); File infile = new File(inpath); InputStream in = new XZInputStream(new FileInputStream(infile)); TTransport inTransport = new TIOStreamTransport(new BufferedInputStream(in)); TBinaryProtocol inProtocol = new TBinaryProtocol(inTransport); inTransport.open(); final StreamItem item = new StreamItem(); while (true) { try { item.read(inProtocol); // System.out.println("Read " + item.stream_id); } catch (TTransportException tte) { // END_OF_FILE is used to indicate EOF and is not an exception. if (tte.getType() != TTransportException.END_OF_FILE) tte.printStackTrace(); break; } } // Do something with this document... String docText = item.getBody().getClean_visible(); StringTokenizer itr = new StringTokenizer(docText); while (itr.hasMoreTokens()) { words.add(itr.nextToken()); } inTransport.close(); } for (String term : words.uniqueSet()) { System.out.println(term + ":" + words.getCount(term)); } } catch (Exception e) { e.printStackTrace(); } }
From source file:de.tudarmstadt.ukp.dkpro.core.frequency.phrasedetection.FrequencyCounter.java
/** * Write counter with counts from a bag to an output stream. * * @param os an {@link OutputStream} * @param counter a {@link Bag} of string counter *//* w w w.j av a 2 s.c o m*/ private void writeNgrams(OutputStream os, Bag<String> counter) { /* create token stream */ Stream<String> stream = counter.uniqueSet().stream().filter(token -> counter.getCount(token) >= minCount); /* sort output */ if (sortByAlphabet) { stream = stream.sorted(String::compareTo); } else if (sortByCount) { stream = stream.sorted((o1, o2) -> -Integer.compare(counter.getCount(o1), counter.getCount(o2))); } /* write tokens with counts */ stream.forEach(token -> { try { os.write((token + COLUMN_SEPARATOR + counter.getCount(token) + "\n").getBytes()); } catch (IOException e) { throw new RuntimeException(e); } }); }
From source file:com.mikenimer.familydam.services.photos.MetadataKeywordsListServlet.java
@Override protected void doGet(SlingHttpServletRequest request, SlingHttpServletResponse response) throws SlingServletException, IOException { String path = "/content/dam"; if (request.getRequestParameter("path") != null) { path = request.getRequestParameter("path").getString(); }//w w w . j a v a 2s . c om try { Bag bag = new HashBag(); String stmt = "select * from [fd:image] WHERE ISDESCENDANTNODE([" + path + "])"; Session session = request.getResourceResolver().adaptTo(Session.class); Query query = session.getWorkspace().getQueryManager().createQuery(stmt, Query.JCR_SQL2); //query.setLimit(limit); //query.setOffset(offset); QueryResult results = query.execute(); // Execute the query and get the results ... // (This is the same as before.) //javax.jcr.QueryResult result = query.execute(); NodeIterator nodeIterator = results.getNodes(); while (nodeIterator.hasNext()) { Node n = nodeIterator.nextNode(); if (n.hasNode("metadata")) { Node metaNode = n.getNode("metadata"); if (metaNode.hasProperty("keywords")) { String keywords = metaNode.getProperty("keywords").getString(); String[] keys = keywords.split(","); for (String key : keys) { bag.add(key); } } } } Set set = bag.uniqueSet(); // find scale ratio, we need a 1-12 range final JSONWriter w = new JSONWriter(response.getWriter()); w.setTidy(true); w.array(); for (Object word : set) { w.object(); w.key("word").value(word.toString().toLowerCase()); w.key("count").value(bag.getCount(word)); w.key("size").value(Math.max(1.5, Math.min(5, bag.getCount(word) * .05)) + "rem"); // todo try this. // $size = min(max(round(( $size_max*($count-$count_min))/($count_max-$count_min),2), $size_min),$size_max); w.endObject(); } w.endArray(); //writeJsonObject(w, tree); } catch (Exception re) { re.printStackTrace(); throw new SlingServletException(new javax.servlet.ServletException(re)); } }
From source file:uniol.apt.analysis.synthesize.separation.KBoundedSeparation.java
private boolean shouldExplore(Bag<State> r, int k) { // Don't continue if no state has cardinality zero, because the result won't be a minimal region if (r.containsAll(utility.getTransitionSystem().getNodes())) return false; // Don't continue if some state has cardinality higher than k for (State state : r.uniqueSet()) if (r.getCount(state) > k) return false; return true;// w ww.j a v a 2 s. co m }