Example usage for org.apache.mahout.clustering Cluster getCenter

List of usage examples for org.apache.mahout.clustering Cluster getCenter

Introduction

In this page you can find the example usage for org.apache.mahout.clustering Cluster getCenter.

Prototype

Vector getCenter();

Source Link

Document

Get the "center" of the Cluster as a Vector

Usage

From source file:DisplayClustering.java

License:Apache License

protected static void plotClusters(Graphics2D g2) {
    int cx = CLUSTERS.size() - 1;
    for (List<Cluster> clusters : CLUSTERS) {
        g2.setStroke(new BasicStroke(cx == 0 ? 3 : 1));
        g2.setColor(COLORS[Math.min(COLORS.length - 1, cx--)]);
        for (Cluster cluster : clusters) {
            plotEllipse(g2, cluster.getCenter(), cluster.getRadius().times(3));
        }//w ww.  j av a  2 s  .com
    }
}

From source file:DisplayClustering.java

License:Apache License

protected static List<Cluster> readClustersWritable(Path clustersIn) {
    List<Cluster> clusters = Lists.newArrayList();
    Configuration conf = new Configuration();
    for (ClusterWritable value : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST,
            PathFilters.logsCRCFilter(), conf)) {
        Cluster cluster = value.getValue();
        log.info("Reading Cluster:{} center:{} numPoints:{} radius:{}", cluster.getId(),
                AbstractCluster.formatVector(cluster.getCenter(), null), cluster.getNumObservations(),
                AbstractCluster.formatVector(cluster.getRadius(), null));
        clusters.add(cluster);/*from w w w .j  av a2 s.  com*/
    }
    return clusters;
}

From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java

License:Apache License

@Override
public NamedList cluster(SolrParams params) {
    NamedList result = new NamedList();
    //check to see if we have new results
    try {//from ww w  . ja  v a2 s. c o m
        if (theFuture != null) {
            //see if we have new results, but don't wait too long for them
            ClusterJob job = theFuture.get(1, TimeUnit.MILLISECONDS);
            if (lastSuccessful != null) {
                //clean up the old ones
                //TODO: clean up the old dirs before switching lastSuccessful
            }
            lastSuccessful = job;
            theFuture = null;
        } else {

        }

    } catch (InterruptedException e) {
        log.error("Exception", e);
    } catch (ExecutionException e) {
        log.error("Exception", e);
    } catch (TimeoutException e) {
        log.error("Exception", e);
    }
    if (lastSuccessful != null) {//we have clusters
        //do we need the points?
        boolean includePoints = params.getBool(INCLUDE_POINTS, false);
        int clusterId = params.getInt(LIST_POINTS, Integer.MIN_VALUE);
        Map<Integer, List<String>> toPoints = lastSuccessful.clusterIdToPoints;
        String docId = params.get(IN_CLUSTER);
        if ((includePoints || clusterId != Integer.MIN_VALUE || docId != null) && toPoints == null) {
            //load the points
            try {
                toPoints = readPoints(new Path(lastSuccessful.jobDir + File.separator + "points"),
                        lastSuccessful.conf);
            } catch (IOException e) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                        "Unable to load points: " + lastSuccessful);
            }
        }
        if (params.getBool(LIST_CLUSTERS)) {
            NamedList nl = new NamedList();
            result.add("all", nl);

            Map<Integer, Cluster> clusterMap = lastSuccessful.clusters;
            if (clusterMap == null) {
                //we aren't caching, so load 'em up
                try {
                    clusterMap = loadClusters(lastSuccessful);
                } catch (Exception e) {
                    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                            "unable to load the clusters from " + lastSuccessful);
                }
            }

            for (Cluster cluster : clusterMap.values()) {
                NamedList clusterNL = new NamedList();
                nl.add(String.valueOf(cluster.getId()), clusterNL);
                clusterNL.add("numPoints", cluster.getNumPoints());
                //TODO: better format?
                clusterNL.add("center", cluster.getCenter().asFormatString());
                if (cluster.getRadius() != null) {
                    clusterNL.add("radius", cluster.getRadius().asFormatString());
                }
                if (includePoints) {
                    List<String> points = toPoints.get(cluster.getId());
                    clusterNL.add("points", points);
                }
            }
        }

        if (docId != null) {

        }
        //TODO: support sending in multiple ids

        if (clusterId != Integer.MIN_VALUE) {
            List<String> points = lastSuccessful.clusterIdToPoints.get(clusterId);
            if (points != null) {
                result.add(String.valueOf(clusterId), points);
            }
        }
    } else if (params.getBool(BUILD, false)) {
        RefCounted<SolrIndexSearcher> refCnt = core.getSearcher();
        int theK = params.getInt(K, 10);
        cluster(refCnt.get(), theK);
        refCnt.decref();
    }
    return result;
}

From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java

License:Open Source License

public static BasicDBList getBsonFromSequenceFile(CustomMapReduceJobPojo cmr, int nLimit, String fields)
        throws SAXException, IOException, ParserConfigurationException {

    BasicDBList dbl = new BasicDBList();

    PropertiesManager props = new PropertiesManager();
    Configuration conf = getConfiguration(props);

    Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false);

    @SuppressWarnings({ "unchecked", "rawtypes" })
    SequenceFileDirIterable<? extends Writable, ? extends Writable> seqFileDir = new SequenceFileDirIterable(
            pathDir, PathType.LIST, PathFilters.logsCRCFilter(), conf);

    // Very basic, only allow top level, 1 level of nesting, and field removal
    HashSet<String> fieldLookup = null;
    if (null != fields) {
        fieldLookup = new HashSet<String>();
        String[] fieldArray = fields.split(",");
        for (String field : fieldArray) {
            String[] fieldDecomp = field.split(":");
            fieldLookup.add(fieldDecomp[0]);
        }/*from  w  w  w.j  a va2  s  .  co  m*/
    } //TOTEST

    int nRecords = 0;
    for (Pair<? extends Writable, ? extends Writable> record : seqFileDir) {
        BasicDBObject element = new BasicDBObject();

        // KEY

        Writable key = record.getFirst();
        if (key instanceof org.apache.hadoop.io.Text) {
            org.apache.hadoop.io.Text writable = (org.apache.hadoop.io.Text) key;
            element.put("key", writable.toString());
        } else if (key instanceof org.apache.hadoop.io.DoubleWritable) {
            org.apache.hadoop.io.DoubleWritable writable = (org.apache.hadoop.io.DoubleWritable) key;
            element.put("key", Double.toString(writable.get()));
        } else if (key instanceof org.apache.hadoop.io.IntWritable) {
            org.apache.hadoop.io.IntWritable writable = (org.apache.hadoop.io.IntWritable) key;
            element.put("key", Integer.toString(writable.get()));
        } else if (key instanceof org.apache.hadoop.io.LongWritable) {
            org.apache.hadoop.io.LongWritable writable = (org.apache.hadoop.io.LongWritable) key;
            element.put("key", Long.toString(writable.get()));
        } else if (key instanceof BSONWritable) {
            element.put("key", MongoDbUtil.convert((BSONWritable) key));
        }

        // VALUE

        Writable value = record.getSecond();
        if (value instanceof org.apache.hadoop.io.Text) {
            org.apache.hadoop.io.Text writable = (org.apache.hadoop.io.Text) value;
            element.put("value", writable.toString());
        } else if (value instanceof org.apache.hadoop.io.DoubleWritable) {
            org.apache.hadoop.io.DoubleWritable writable = (org.apache.hadoop.io.DoubleWritable) value;
            element.put("value", Double.toString(writable.get()));
        } else if (value instanceof org.apache.hadoop.io.IntWritable) {
            org.apache.hadoop.io.IntWritable writable = (org.apache.hadoop.io.IntWritable) value;
            element.put("value", Integer.toString(writable.get()));
        } else if (value instanceof org.apache.hadoop.io.LongWritable) {
            org.apache.hadoop.io.LongWritable writable = (org.apache.hadoop.io.LongWritable) value;
            element.put("value", Long.toString(writable.get()));
        } else if (value instanceof BSONWritable) {
            element.put("value", MongoDbUtil.convert((BSONWritable) value));
        } else if (value instanceof org.apache.mahout.math.VectorWritable) {
            Vector vec = ((org.apache.mahout.math.VectorWritable) value).get();
            BasicDBList dbl2 = listFromMahoutVector(vec, "value", element);
            element.put("value", dbl2);
        } else if (value instanceof org.apache.mahout.clustering.classify.WeightedVectorWritable) {
            org.apache.mahout.clustering.classify.WeightedVectorWritable vecW = (org.apache.mahout.clustering.classify.WeightedVectorWritable) value;
            element.put("valueWeight", vecW.getWeight());
            BasicDBList dbl2 = listFromMahoutVector(vecW.getVector(), "value", element);
            element.put("value", dbl2);
        } else if (value instanceof org.apache.mahout.clustering.iterator.ClusterWritable) {
            Cluster cluster = ((org.apache.mahout.clustering.iterator.ClusterWritable) value).getValue();
            BasicDBObject clusterVal = new BasicDBObject();
            clusterVal.put("center", listFromMahoutVector(cluster.getCenter(), "center", clusterVal));
            clusterVal.put("radius", listFromMahoutVector(cluster.getRadius(), "radius", clusterVal));
            element.put("value", clusterVal);
        } else {
            element.put("unknownValue", value.getClass().toString());
        }

        // Check the fields settings:
        // Only handle a few...
        if (null != fieldLookup) {
            for (String fieldToRemove : fieldLookup) {
                if (fieldToRemove.startsWith("value.")) {
                    fieldToRemove = fieldToRemove.substring(6);
                    BasicDBObject nested = (BasicDBObject) element.get("value.");
                    if (null != nested) {
                        nested.remove(fieldToRemove);
                    }
                } else {
                    element.remove(fieldToRemove);
                }
            } //TOTEST
        }

        dbl.add(element);
        nRecords++;
        if ((nLimit > 0) && (nRecords >= nLimit)) {
            break;
        }
    }

    return dbl;
}

From source file:com.modofo.molo.cluster.DisplayClustering.java

License:Apache License

protected static List<Cluster> readClustersWritable(Path clustersIn) {
    List<Cluster> clusters = Lists.newArrayList();
    Configuration conf = new Configuration();
    for (ClusterWritable value : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST,
            PathFilters.logsCRCFilter(), conf)) {
        Cluster cluster = value.getValue();
        log.info("Reading Cluster:{} center:{} numPoints:{} radius:{}",
                new Object[] { cluster.getId(), AbstractCluster.formatVector(cluster.getCenter(), null),
                        cluster.getNumObservations(),
                        AbstractCluster.formatVector(cluster.getRadius(), null) });
        clusters.add(cluster);//from w w  w  .  jav a2  s  . c o m
    }
    return clusters;
}

From source file:io.github.thushear.display.DisplayClustering.java

License:Apache License

protected static void plotClusters(Graphics2D g2) {
    int cx = CLUSTERS.size() - 1;
    for (List<Cluster> clusters : CLUSTERS) {
        g2.setStroke(new BasicStroke(cx == 0 ? 3 : 1));
        g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, cx--)]);
        for (Cluster cluster : clusters) {
            plotEllipse(g2, cluster.getCenter(), cluster.getRadius().times(3));
        }/*from   www  .j  ava  2s.c om*/
    }
}

From source file:io.github.thushear.display.DisplayClustering.java

License:Apache License

protected static List<Cluster> readClusters(Path clustersIn) {
    List<Cluster> clusters = new ArrayList<Cluster>();
    Configuration conf = new Configuration();
    for (Cluster value : new SequenceFileDirValueIterable<Cluster>(clustersIn, PathType.LIST,
            PathFilters.logsCRCFilter(), conf)) {
        log.info("Reading Cluster:{} center:{} numPoints:{} radius:{}",
                new Object[] { value.getId(), AbstractCluster.formatVector(value.getCenter(), null),
                        value.getNumPoints(), AbstractCluster.formatVector(value.getRadius(), null) });
        clusters.add(value);/*from   w w  w .j a  v  a2  s  . com*/
    }
    return clusters;
}

From source file:net.aprendizajengrande.ontocluster.Clusterer.java

License:Open Source License

public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {

    if (args.length != 3) {
        System.err.println(/*from  w w  w.ja v  a2 s  . co  m*/
                "Usage: <input hdfs folder with vectors> <hdfs folder for output> <local folder for output>");
        System.exit(1);
    }

    Configuration conf = new Configuration();
    DistanceMeasure measure = new CosineDistanceMeasure();
    long seed = 67241;
    int numClusters = 250;
    int numIterations = 500;

    // see
    // http://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file
    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    // crear vectores en HDFS
    System.out.println("Input: " + args[0]);
    Path input = new Path(args[0] + "/input");

    // first centroids are an input parameter to clustering
    Path clusters = new Path(args[0] + "/clusters");
    clusters = RandomSeedGenerator.buildRandom(conf, input, clusters, numClusters, measure, seed);

    Path output = new Path(args[1]);

    // cluster
    KMeansDriver.run(input, clusters, output, 0.005, numIterations, true, 0.0, false);

    // read the rel names, to pretty print

    Path inputRels = new Path(args[0] + "/rels");
    FileSystem fs = inputRels.getFileSystem(conf);
    FSDataInputStream fsdis = fs.open(inputRels);
    BufferedReader br = new BufferedReader(new InputStreamReader(fsdis));
    String line = br.readLine();
    Map<Integer, String> relIdToName = new HashMap<>();
    while (line != null) {
        String[] parts = line.split("\\t");
        relIdToName.put(Integer.parseInt(parts[0]), parts[1]);
        line = br.readLine();
    }

    // read output
    Path outputFinal = ClusterExtractor.findFinalClusters(args[1], conf);
    if (outputFinal == null) {
        System.err.println("Couldn't find final clusters at '" + args[1] + "-\\d+-final'");
        System.exit(1);
    }
    Path successFile = new Path(outputFinal, "_SUCCESS");
    if (fs.exists(successFile)) {
        fs.delete(successFile, false);
    }

    SequenceFileDirIterable<Text, Writable> it = new SequenceFileDirIterable<>(outputFinal, PathType.LIST,
            conf);

    PrintWriter pw = new PrintWriter(new FileWriter(new File(args[2])));

    int clusterNum = 0;
    for (Pair<Text, Writable> p : it) {
        Object obj = p.getSecond();
        if (!(obj instanceof ClusterWritable))
            continue;
        pw.println(clusterNum + ") " + p.getFirst());
        Cluster cluster = ((ClusterWritable) obj).getValue();
        Vector center = cluster.getCenter();
        for (int i = 0; i < center.size(); i++) {
            String name = relIdToName.get(i);
            if (name == null)
                name = "?";
            if (center.get(i) >= 0.01)
                pw.println("\t" + name + ": " + center.get(i));
        }
        pw.println();
        clusterNum++;
    }
    pw.close();
}

From source file:net.aprendizajengrande.ontocluster.ClusterExtractor.java

License:Open Source License

public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {

    if (args.length != 3) {
        System.err.println(//from  w w  w . ja  v a 2  s  .  c o  m
                "Usage: <input hdfs folder with rels> <hdfs folder for output> <local folder for output>");
        System.exit(1);
    }

    Configuration conf = new Configuration();

    // see
    // http://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file
    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    // crear vectores en HDFS
    System.out.println("Input: " + args[0]);

    // read the rel names, to pretty print

    Path inputRels = new Path(args[0] + "/rels");
    FileSystem fs = inputRels.getFileSystem(conf);
    FSDataInputStream fsdis = fs.open(inputRels);
    BufferedReader br = new BufferedReader(new InputStreamReader(fsdis));
    String line = br.readLine();
    Map<Integer, String> relIdToName = new HashMap<>();
    while (line != null) {
        String[] parts = line.split("\\t");
        relIdToName.put(Integer.parseInt(parts[0]), parts[1]);
        line = br.readLine();
    }

    // read output
    Path outputFinal = findFinalClusters(args[1], conf);
    if (outputFinal == null) {
        System.err.println("Couldn't find final clusters at '" + args[1] + "-\\d+-final'");
        System.exit(1);
    }

    // delete the _SUCCESS file as it is problematic
    // see
    // http://stackoverflow.com/questions/10752708/eofexception-at-org-apache-hadoop-io-sequencefilereader-initsequencefile-java
    Path successFile = new Path(outputFinal, "_SUCCESS");
    if (fs.exists(successFile)) {
        fs.delete(successFile, false);
    }

    SequenceFileDirIterable<Text, Writable> it = new SequenceFileDirIterable<>(outputFinal, PathType.LIST,
            conf);

    PrintWriter pw = new PrintWriter(new FileWriter(new File(args[2])));

    int clusterNum = 0;
    for (Pair<Text, Writable> p : it) {
        Object obj = p.getSecond();
        if (!(obj instanceof ClusterWritable))
            continue;
        pw.println(clusterNum + ") " + p.getFirst());
        Cluster cluster = ((ClusterWritable) obj).getValue();
        Vector center = cluster.getCenter();
        for (int i = 0; i < center.size(); i++) {
            String name = relIdToName.get(i);
            if (name == null)
                name = "?";
            if (center.get(i) >= 0.01)
                pw.println("\t" + name + ": " + center.get(i));
        }
        pw.println();
        clusterNum++;
    }
    pw.close();
}

From source file:org.conan.mymahout.clustering.display.DisplayCanopy.java

License:Apache License

protected static void plotClusters(Graphics2D g2) {
    int cx = CLUSTERS.size() - 1;
    for (List<Cluster> clusters : CLUSTERS) {
        for (Cluster cluster : clusters) {
            if (isSignificant(cluster)) {
                g2.setStroke(new BasicStroke(1));
                g2.setColor(Color.BLUE);
                double[] t1 = { T1, T1 };
                plotEllipse(g2, cluster.getCenter(), new DenseVector(t1));
                double[] t2 = { T2, T2 };
                plotEllipse(g2, cluster.getCenter(), new DenseVector(t2));
                g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, cx)]);
                g2.setStroke(new BasicStroke(cx == 0 ? 3 : 1));
                plotEllipse(g2, cluster.getCenter(), cluster.getRadius().times(3));
            }/*from  w w w. j a va2s  .  c  o m*/
        }
        cx--;
    }
}