Example usage for org.apache.mahout.clustering Cluster getRadius

Introduction

In this page you can find the example usage for org.apache.mahout.clustering Cluster getRadius.

Prototype

Vector getRadius();

Source Link

Document

Get the "radius" of the Cluster as a Vector.

Usage

From source file:DisplayClustering.java

License:Apache License

protected static void plotClusters(Graphics2D g2) {
    int cx = CLUSTERS.size() - 1;
    for (List<Cluster> clusters : CLUSTERS) {
        g2.setStroke(new BasicStroke(cx == 0 ? 3 : 1));
        g2.setColor(COLORS[Math.min(COLORS.length - 1, cx--)]);
        for (Cluster cluster : clusters) {
            plotEllipse(g2, cluster.getCenter(), cluster.getRadius().times(3));
        }//w  w  w. j  av  a 2 s .  c  o m
    }
}

From source file:DisplayClustering.java

License:Apache License

protected static List<Cluster> readClustersWritable(Path clustersIn) {
    List<Cluster> clusters = Lists.newArrayList();
    Configuration conf = new Configuration();
    for (ClusterWritable value : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST,
            PathFilters.logsCRCFilter(), conf)) {
        Cluster cluster = value.getValue();
        log.info("Reading Cluster:{} center:{} numPoints:{} radius:{}", cluster.getId(),
                AbstractCluster.formatVector(cluster.getCenter(), null), cluster.getNumObservations(),
                AbstractCluster.formatVector(cluster.getRadius(), null));
        clusters.add(cluster);//w  ww.  ja v a2  s.  c om
    }
    return clusters;
}

From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java

License:Apache License

@Override
public NamedList cluster(SolrParams params) {
    NamedList result = new NamedList();
    //check to see if we have new results
    try {//from  ww  w .  ja v  a  2 s.c om
        if (theFuture != null) {
            //see if we have new results, but don't wait too long for them
            ClusterJob job = theFuture.get(1, TimeUnit.MILLISECONDS);
            if (lastSuccessful != null) {
                //clean up the old ones
                //TODO: clean up the old dirs before switching lastSuccessful
            }
            lastSuccessful = job;
            theFuture = null;
        } else {

        }

    } catch (InterruptedException e) {
        log.error("Exception", e);
    } catch (ExecutionException e) {
        log.error("Exception", e);
    } catch (TimeoutException e) {
        log.error("Exception", e);
    }
    if (lastSuccessful != null) {//we have clusters
        //do we need the points?
        boolean includePoints = params.getBool(INCLUDE_POINTS, false);
        int clusterId = params.getInt(LIST_POINTS, Integer.MIN_VALUE);
        Map<Integer, List<String>> toPoints = lastSuccessful.clusterIdToPoints;
        String docId = params.get(IN_CLUSTER);
        if ((includePoints || clusterId != Integer.MIN_VALUE || docId != null) && toPoints == null) {
            //load the points
            try {
                toPoints = readPoints(new Path(lastSuccessful.jobDir + File.separator + "points"),
                        lastSuccessful.conf);
            } catch (IOException e) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                        "Unable to load points: " + lastSuccessful);
            }
        }
        if (params.getBool(LIST_CLUSTERS)) {
            NamedList nl = new NamedList();
            result.add("all", nl);

            Map<Integer, Cluster> clusterMap = lastSuccessful.clusters;
            if (clusterMap == null) {
                //we aren't caching, so load 'em up
                try {
                    clusterMap = loadClusters(lastSuccessful);
                } catch (Exception e) {
                    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                            "unable to load the clusters from " + lastSuccessful);
                }
            }

            for (Cluster cluster : clusterMap.values()) {
                NamedList clusterNL = new NamedList();
                nl.add(String.valueOf(cluster.getId()), clusterNL);
                clusterNL.add("numPoints", cluster.getNumPoints());
                //TODO: better format?
                clusterNL.add("center", cluster.getCenter().asFormatString());
                if (cluster.getRadius() != null) {
                    clusterNL.add("radius", cluster.getRadius().asFormatString());
                }
                if (includePoints) {
                    List<String> points = toPoints.get(cluster.getId());
                    clusterNL.add("points", points);
                }
            }
        }

        if (docId != null) {

        }
        //TODO: support sending in multiple ids

        if (clusterId != Integer.MIN_VALUE) {
            List<String> points = lastSuccessful.clusterIdToPoints.get(clusterId);
            if (points != null) {
                result.add(String.valueOf(clusterId), points);
            }
        }
    } else if (params.getBool(BUILD, false)) {
        RefCounted<SolrIndexSearcher> refCnt = core.getSearcher();
        int theK = params.getInt(K, 10);
        cluster(refCnt.get(), theK);
        refCnt.decref();
    }
    return result;
}

From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java

License:Open Source License

public static BasicDBList getBsonFromSequenceFile(CustomMapReduceJobPojo cmr, int nLimit, String fields)
        throws SAXException, IOException, ParserConfigurationException {

    BasicDBList dbl = new BasicDBList();

    PropertiesManager props = new PropertiesManager();
    Configuration conf = getConfiguration(props);

    Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false);

    @SuppressWarnings({ "unchecked", "rawtypes" })
    SequenceFileDirIterable<? extends Writable, ? extends Writable> seqFileDir = new SequenceFileDirIterable(
            pathDir, PathType.LIST, PathFilters.logsCRCFilter(), conf);

    // Very basic, only allow top level, 1 level of nesting, and field removal
    HashSet<String> fieldLookup = null;
    if (null != fields) {
        fieldLookup = new HashSet<String>();
        String[] fieldArray = fields.split(",");
        for (String field : fieldArray) {
            String[] fieldDecomp = field.split(":");
            fieldLookup.add(fieldDecomp[0]);
        }// ww w  . j  a  va 2s.  c  o m
    } //TOTEST

    int nRecords = 0;
    for (Pair<? extends Writable, ? extends Writable> record : seqFileDir) {
        BasicDBObject element = new BasicDBObject();

        // KEY

        Writable key = record.getFirst();
        if (key instanceof org.apache.hadoop.io.Text) {
            org.apache.hadoop.io.Text writable = (org.apache.hadoop.io.Text) key;
            element.put("key", writable.toString());
        } else if (key instanceof org.apache.hadoop.io.DoubleWritable) {
            org.apache.hadoop.io.DoubleWritable writable = (org.apache.hadoop.io.DoubleWritable) key;
            element.put("key", Double.toString(writable.get()));
        } else if (key instanceof org.apache.hadoop.io.IntWritable) {
            org.apache.hadoop.io.IntWritable writable = (org.apache.hadoop.io.IntWritable) key;
            element.put("key", Integer.toString(writable.get()));
        } else if (key instanceof org.apache.hadoop.io.LongWritable) {
            org.apache.hadoop.io.LongWritable writable = (org.apache.hadoop.io.LongWritable) key;
            element.put("key", Long.toString(writable.get()));
        } else if (key instanceof BSONWritable) {
            element.put("key", MongoDbUtil.convert((BSONWritable) key));
        }

        // VALUE

        Writable value = record.getSecond();
        if (value instanceof org.apache.hadoop.io.Text) {
            org.apache.hadoop.io.Text writable = (org.apache.hadoop.io.Text) value;
            element.put("value", writable.toString());
        } else if (value instanceof org.apache.hadoop.io.DoubleWritable) {
            org.apache.hadoop.io.DoubleWritable writable = (org.apache.hadoop.io.DoubleWritable) value;
            element.put("value", Double.toString(writable.get()));
        } else if (value instanceof org.apache.hadoop.io.IntWritable) {
            org.apache.hadoop.io.IntWritable writable = (org.apache.hadoop.io.IntWritable) value;
            element.put("value", Integer.toString(writable.get()));
        } else if (value instanceof org.apache.hadoop.io.LongWritable) {
            org.apache.hadoop.io.LongWritable writable = (org.apache.hadoop.io.LongWritable) value;
            element.put("value", Long.toString(writable.get()));
        } else if (value instanceof BSONWritable) {
            element.put("value", MongoDbUtil.convert((BSONWritable) value));
        } else if (value instanceof org.apache.mahout.math.VectorWritable) {
            Vector vec = ((org.apache.mahout.math.VectorWritable) value).get();
            BasicDBList dbl2 = listFromMahoutVector(vec, "value", element);
            element.put("value", dbl2);
        } else if (value instanceof org.apache.mahout.clustering.classify.WeightedVectorWritable) {
            org.apache.mahout.clustering.classify.WeightedVectorWritable vecW = (org.apache.mahout.clustering.classify.WeightedVectorWritable) value;
            element.put("valueWeight", vecW.getWeight());
            BasicDBList dbl2 = listFromMahoutVector(vecW.getVector(), "value", element);
            element.put("value", dbl2);
        } else if (value instanceof org.apache.mahout.clustering.iterator.ClusterWritable) {
            Cluster cluster = ((org.apache.mahout.clustering.iterator.ClusterWritable) value).getValue();
            BasicDBObject clusterVal = new BasicDBObject();
            clusterVal.put("center", listFromMahoutVector(cluster.getCenter(), "center", clusterVal));
            clusterVal.put("radius", listFromMahoutVector(cluster.getRadius(), "radius", clusterVal));
            element.put("value", clusterVal);
        } else {
            element.put("unknownValue", value.getClass().toString());
        }

        // Check the fields settings:
        // Only handle a few...
        if (null != fieldLookup) {
            for (String fieldToRemove : fieldLookup) {
                if (fieldToRemove.startsWith("value.")) {
                    fieldToRemove = fieldToRemove.substring(6);
                    BasicDBObject nested = (BasicDBObject) element.get("value.");
                    if (null != nested) {
                        nested.remove(fieldToRemove);
                    }
                } else {
                    element.remove(fieldToRemove);
                }
            } //TOTEST
        }

        dbl.add(element);
        nRecords++;
        if ((nLimit > 0) && (nRecords >= nLimit)) {
            break;
        }
    }

    return dbl;
}

From source file:com.modofo.molo.cluster.DisplayClustering.java

License:Apache License

protected static List<Cluster> readClustersWritable(Path clustersIn) {
    List<Cluster> clusters = Lists.newArrayList();
    Configuration conf = new Configuration();
    for (ClusterWritable value : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST,
            PathFilters.logsCRCFilter(), conf)) {
        Cluster cluster = value.getValue();
        log.info("Reading Cluster:{} center:{} numPoints:{} radius:{}",
                new Object[] { cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null),
                        cluster.getNumObservations(),
                        AbstractCluster.formatVector(cluster.getRadius(), null) });
        clusters.add(cluster);/*from   w w w.  j a  v  a  2  s  .com*/
    }
    return clusters;
}

From source file:io.github.thushear.display.DisplayClustering.java

License:Apache License

protected static void plotClusters(Graphics2D g2) {
    int cx = CLUSTERS.size() - 1;
    for (List<Cluster> clusters : CLUSTERS) {
        g2.setStroke(new BasicStroke(cx == 0 ? 3 : 1));
        g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, cx--)]);
        for (Cluster cluster : clusters) {
            plotEllipse(g2, cluster.getCenter(), cluster.getRadius().times(3));
        }/*w  ww .jav  a2s . c  o  m*/
    }
}

From source file:io.github.thushear.display.DisplayClustering.java

License:Apache License

protected static List<Cluster> readClusters(Path clustersIn) {
    List<Cluster> clusters = new ArrayList<Cluster>();
    Configuration conf = new Configuration();
    for (Cluster value : new SequenceFileDirValueIterable<Cluster>(clustersIn, PathType.LIST,
            PathFilters.logsCRCFilter(), conf)) {
        log.info("Reading Cluster:{} center:{} numPoints:{} radius:{}",
                new Object[] { value.getId(), AbstractCluster.formatVector(value.getCenter(), null),
                        value.getNumPoints(), AbstractCluster.formatVector(value.getRadius(), null) });
        clusters.add(value);//from   ww w  .  j  a  v  a2  s .c  o  m
    }
    return clusters;
}

From source file:org.conan.mymahout.clustering.display.DisplayCanopy.java

License:Apache License

protected static void plotClusters(Graphics2D g2) {
    int cx = CLUSTERS.size() - 1;
    for (List<Cluster> clusters : CLUSTERS) {
        for (Cluster cluster : clusters) {
            if (isSignificant(cluster)) {
                g2.setStroke(new BasicStroke(1));
                g2.setColor(Color.BLUE);
                double[] t1 = { T1, T1 };
                plotEllipse(g2, cluster.getCenter(), new DenseVector(t1));
                double[] t2 = { T2, T2 };
                plotEllipse(g2, cluster.getCenter(), new DenseVector(t2));
                g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, cx)]);
                g2.setStroke(new BasicStroke(cx == 0 ? 3 : 1));
                plotEllipse(g2, cluster.getCenter(), cluster.getRadius().times(3));
            }// ww  w.j  a  va 2s. co m
        }
        cx--;
    }
}

From source file:sample.DisplayClustering.java

License:Apache License

protected static List<Cluster> readClustersWritable(Path clustersIn) {
    List<Cluster> clusters = new ArrayList<>();
    Configuration conf = new Configuration();
    for (ClusterWritable value : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST,
            PathFilters.logsCRCFilter(), conf)) {
        Cluster cluster = value.getValue();
        System.out.println("Cluster: " + cluster.getId() + " "
                + AbstractCluster.formatVector(cluster.getCenter(), null) + " " + cluster.getNumObservations()
                + " " + AbstractCluster.formatVector(cluster.getRadius(), null));
        clusters.add(cluster);/*w  w w. j ava 2 s . co m*/
    }
    return clusters;
}