List of usage examples for org.apache.mahout.clustering Cluster getId
int getId();
From source file:DisplayClustering.java
License:Apache License
protected static List<Cluster> readClustersWritable(Path clustersIn) { List<Cluster> clusters = Lists.newArrayList(); Configuration conf = new Configuration(); for (ClusterWritable value : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) { Cluster cluster = value.getValue(); log.info("Reading Cluster:{} center:{} numPoints:{} radius:{}", cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null), cluster.getNumObservations(), AbstractCluster.formatVector(cluster.getRadius(), null)); clusters.add(cluster);//from w ww . j a v a2s .com } return clusters; }
From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java
License:Apache License
@Override public NamedList cluster(SolrParams params) { NamedList result = new NamedList(); //check to see if we have new results try {/* w w w . j a va2s. c om*/ if (theFuture != null) { //see if we have new results, but don't wait too long for them ClusterJob job = theFuture.get(1, TimeUnit.MILLISECONDS); if (lastSuccessful != null) { //clean up the old ones //TODO: clean up the old dirs before switching lastSuccessful } lastSuccessful = job; theFuture = null; } else { } } catch (InterruptedException e) { log.error("Exception", e); } catch (ExecutionException e) { log.error("Exception", e); } catch (TimeoutException e) { log.error("Exception", e); } if (lastSuccessful != null) {//we have clusters //do we need the points? boolean includePoints = params.getBool(INCLUDE_POINTS, false); int clusterId = params.getInt(LIST_POINTS, Integer.MIN_VALUE); Map<Integer, List<String>> toPoints = lastSuccessful.clusterIdToPoints; String docId = params.get(IN_CLUSTER); if ((includePoints || clusterId != Integer.MIN_VALUE || docId != null) && toPoints == null) { //load the points try { toPoints = readPoints(new Path(lastSuccessful.jobDir + File.separator + "points"), lastSuccessful.conf); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to load points: " + lastSuccessful); } } if (params.getBool(LIST_CLUSTERS)) { NamedList nl = new NamedList(); result.add("all", nl); Map<Integer, Cluster> clusterMap = lastSuccessful.clusters; if (clusterMap == null) { //we aren't caching, so load 'em up try { clusterMap = loadClusters(lastSuccessful); } catch (Exception e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "unable to load the clusters from " + lastSuccessful); } } for (Cluster cluster : clusterMap.values()) { NamedList clusterNL = new NamedList(); nl.add(String.valueOf(cluster.getId()), clusterNL); clusterNL.add("numPoints", cluster.getNumPoints()); //TODO: better format? clusterNL.add("center", cluster.getCenter().asFormatString()); if (cluster.getRadius() != null) { clusterNL.add("radius", cluster.getRadius().asFormatString()); } if (includePoints) { List<String> points = toPoints.get(cluster.getId()); clusterNL.add("points", points); } } } if (docId != null) { } //TODO: support sending in multiple ids if (clusterId != Integer.MIN_VALUE) { List<String> points = lastSuccessful.clusterIdToPoints.get(clusterId); if (points != null) { result.add(String.valueOf(clusterId), points); } } } else if (params.getBool(BUILD, false)) { RefCounted<SolrIndexSearcher> refCnt = core.getSearcher(); int theK = params.getInt(K, 10); cluster(refCnt.get(), theK); refCnt.decref(); } return result; }
From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java
License:Apache License
private static Map<Integer, Cluster> loadClusters(ClusterJob job) throws Exception { Map<Integer, Cluster> result = new HashMap<Integer, Cluster>(); try {/* w ww .j ava 2 s.c om*/ FileSystem fs = job.output.getFileSystem(job.conf); for (FileStatus seqFile : fs.globStatus(new Path(job.output, "part-*"))) { Path path = seqFile.getPath(); //System.out.println("Input Path: " + path); doesn't this interfere with output? SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job.conf); try { Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance(); Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance(); while (reader.next(key, value)) { Cluster cluster = (Cluster) value; result.put(cluster.getId(), cluster); } } finally { reader.close(); } } } finally { } return result; }
From source file:com.modofo.molo.cluster.DisplayClustering.java
License:Apache License
protected static List<Cluster> readClustersWritable(Path clustersIn) { List<Cluster> clusters = Lists.newArrayList(); Configuration conf = new Configuration(); for (ClusterWritable value : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) { Cluster cluster = value.getValue(); log.info("Reading Cluster:{} center:{} numPoints:{} radius:{}", new Object[] { cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null), cluster.getNumObservations(), AbstractCluster.formatVector(cluster.getRadius(), null) }); clusters.add(cluster);//w w w. java2 s . co m } return clusters; }
From source file:io.github.thushear.display.DisplayClustering.java
License:Apache License
protected static List<Cluster> readClusters(Path clustersIn) { List<Cluster> clusters = new ArrayList<Cluster>(); Configuration conf = new Configuration(); for (Cluster value : new SequenceFileDirValueIterable<Cluster>(clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) { log.info("Reading Cluster:{} center:{} numPoints:{} radius:{}", new Object[] { value.getId(), AbstractCluster.formatVector(value.getCenter(), null), value.getNumPoints(), AbstractCluster.formatVector(value.getRadius(), null) }); clusters.add(value);/*from w w w. j av a 2s . com*/ } return clusters; }
From source file:sample.DisplayClustering.java
License:Apache License
protected static List<Cluster> readClustersWritable(Path clustersIn) { List<Cluster> clusters = new ArrayList<>(); Configuration conf = new Configuration(); for (ClusterWritable value : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) { Cluster cluster = value.getValue(); System.out.println("Cluster: " + cluster.getId() + " " + AbstractCluster.formatVector(cluster.getCenter(), null) + " " + cluster.getNumObservations() + " " + AbstractCluster.formatVector(cluster.getRadius(), null)); clusters.add(cluster);/*from w w w .ja v a2 s. co m*/ } return clusters; }