List of usage examples for org.apache.mahout.math DenseVector DenseVector
public DenseVector(Vector vector)
From source file:SimpleCsvExamples.java
License:Apache License
public static void main(String[] args) throws IOException { FeatureVectorEncoder[] encoder = new FeatureVectorEncoder[FIELDS]; for (int i = 0; i < FIELDS; i++) { encoder[i] = new ConstantValueEncoder("v" + 1); }//from w ww.j a v a 2 s. c o m OnlineSummarizer[] s = new OnlineSummarizer[FIELDS]; for (int i = 0; i < FIELDS; i++) { s[i] = new OnlineSummarizer(); } long t0 = System.currentTimeMillis(); Vector v = new DenseVector(1000); if ("--generate".equals(args[0])) { PrintWriter out = new PrintWriter( new OutputStreamWriter(new FileOutputStream(new File(args[2])), Charsets.UTF_8)); try { int n = Integer.parseInt(args[1]); for (int i = 0; i < n; i++) { Line x = Line.generate(); out.println(x); } } finally { Closeables.close(out, false); } } else if ("--parse".equals(args[0])) { BufferedReader in = Files.newReader(new File(args[1]), Charsets.UTF_8); double total = 0; try { String line = in.readLine(); while (line != null) { v.assign(0); Line x = new Line(line); for (int i = 0; i < FIELDS; i++) { double z = x.getDouble(i); total += z; //s[i].add(x.getDouble(i)); encoder[i].addToVector(x.get(i), v); } line = in.readLine(); } } finally { Closeables.close(in, true); } // String separator = ""; // for (int i = 0; i < FIELDS; i++) { // System.out.printf("%s%.3f", separator, s[i].getMean()); // separator = ","; // } System.out.println("total: " + total); } else if ("--fast".equals(args[0])) { FastLineReader in = new FastLineReader(new FileInputStream(args[1])); double total = 0; try { FastLine line = in.read(); while (line != null) { v.assign(0); for (int i = 0; i < FIELDS; i++) { double z = line.getDouble(i); total += z; //s[i].add(z); encoder[i].addToVector((byte[]) null, z, v); } line = in.read(); } } finally { Closeables.close(in, true); } // String separator = ""; // for (int i = 0; i < FIELDS; i++) { // System.out.printf("%s%.3f", separator, s[i].getMean()); // separator = ","; // } System.out.println("total: " + total); } System.out.printf("\nElapsed time = %.3f%n", (System.currentTimeMillis() - t0) / 1000.0); }
From source file:DisplayClustering.java
License:Apache License
protected static void plotSampleParameters(Graphics2D g2) { Vector v = new DenseVector(2); Vector dv = new DenseVector(2); g2.setColor(Color.RED);//from ww w.ja va2 s. co m for (Vector param : SAMPLE_PARAMS) { v.set(0, param.get(0)); v.set(1, param.get(1)); dv.set(0, param.get(2) * 3); dv.set(1, param.get(3) * 3); plotEllipse(g2, v, dv); } }
From source file:DisplayClustering.java
License:Apache License
protected static void plotSampleData(Graphics2D g2) { double sx = (double) res / DS; g2.setTransform(AffineTransform.getScaleInstance(sx, sx)); // plot the axes g2.setColor(Color.BLACK);// w w w. j av a 2 s .c om Vector dv = new DenseVector(2).assign(SIZE / 2.0); plotRectangle(g2, new DenseVector(2).assign(2), dv); plotRectangle(g2, new DenseVector(2).assign(-2), dv); // plot the sample data g2.setColor(Color.DARK_GRAY); dv.assign(0.03); for (VectorWritable v : SAMPLE_DATA) { plotRectangle(g2, v.get(), dv); } }
From source file:DisplayClustering.java
License:Apache License
/** * This method plots points and colors them according to their cluster * membership, rather than drawing ellipses. * * As of commit, this method is used only by K-means spectral clustering. * Since the cluster assignments are set within the eigenspace of the data, it * is not inherent that the original data cluster as they would in K-means: * that is, as symmetric gaussian mixtures. * * Since Spectral K-Means uses K-Means to cluster the eigenspace data, the raw * output is not directly usable. Rather, the cluster assignments from the raw * output need to be transferred back to the original data. As such, this * method will read the SequenceFile cluster results of K-means and transfer * the cluster assignments to the original data, coloring them appropriately. * * @param g2/*w ww.j a v a2s .c o m*/ * @param data */ protected static void plotClusteredSampleData(Graphics2D g2, Path data) { double sx = (double) res / DS; g2.setTransform(AffineTransform.getScaleInstance(sx, sx)); g2.setColor(Color.BLACK); Vector dv = new DenseVector(2).assign(SIZE / 2.0); plotRectangle(g2, new DenseVector(2).assign(2), dv); plotRectangle(g2, new DenseVector(2).assign(-2), dv); // plot the sample data, colored according to the cluster they belong to dv.assign(0.03); Path clusteredPointsPath = new Path(data, "clusteredPoints"); Path inputPath = new Path(clusteredPointsPath, "part-m-00000"); Map<Integer, Color> colors = new HashMap<Integer, Color>(); int point = 0; for (Pair<IntWritable, WeightedVectorWritable> record : new SequenceFileIterable<IntWritable, WeightedVectorWritable>( inputPath, new Configuration())) { int clusterId = record.getFirst().get(); VectorWritable v = SAMPLE_DATA.get(point++); Integer key = clusterId; if (!colors.containsKey(key)) { colors.put(key, COLORS[Math.min(COLORS.length - 1, colors.size())]); } plotClusteredRectangle(g2, v.get(), dv, colors.get(key)); } }
From source file:DisplayClustering.java
License:Apache License
/** * Identical to plotRectangle(), but with the option of setting the color of * the rectangle's stroke./*from www .j a v a 2 s.co m*/ * * NOTE: This should probably be refactored with plotRectangle() since most of * the code here is direct copy/paste from that method. * * @param g2 * A Graphics2D context. * @param v * A vector for the rectangle's center. * @param dv * A vector for the rectangle's dimensions. * @param color * The color of the rectangle's stroke. */ protected static void plotClusteredRectangle(Graphics2D g2, Vector v, Vector dv, Color color) { double[] flip = { 1, -1 }; Vector v2 = v.times(new DenseVector(flip)); v2 = v2.minus(dv.divide(2)); int h = SIZE / 2; double x = v2.get(0) + h; double y = v2.get(1) + h; g2.setStroke(new BasicStroke(1)); g2.setColor(color); g2.draw(new Rectangle2D.Double(x * DS, y * DS, dv.get(0) * DS, dv.get(1) * DS)); }
From source file:DisplayClustering.java
License:Apache License
/** * Draw a rectangle on the graphics context * * @param g2//w w w . j ava 2s . com * a Graphics2D context * @param v * a Vector of rectangle center * @param dv * a Vector of rectangle dimensions */ protected static void plotRectangle(Graphics2D g2, Vector v, Vector dv) { double[] flip = { 1, -1 }; Vector v2 = v.times(new DenseVector(flip)); v2 = v2.minus(dv.divide(2)); int h = SIZE / 2; double x = v2.get(0) + h; double y = v2.get(1) + h; g2.draw(new Rectangle2D.Double(x * DS, y * DS, dv.get(0) * DS, dv.get(1) * DS)); }
From source file:DisplayClustering.java
License:Apache License
/** * Draw an ellipse on the graphics context * * @param g2/*from ww w .j a v a2 s . co m*/ * a Graphics2D context * @param v * a Vector of ellipse center * @param dv * a Vector of ellipse dimensions */ protected static void plotEllipse(Graphics2D g2, Vector v, Vector dv) { double[] flip = { 1, -1 }; Vector v2 = v.times(new DenseVector(flip)); v2 = v2.minus(dv.divide(2)); int h = SIZE / 2; double x = v2.get(0) + h; double y = v2.get(1) + h; g2.draw(new Ellipse2D.Double(x * DS, y * DS, dv.get(0) * DS, dv.get(1) * DS)); }
From source file:DisplayClustering.java
License:Apache License
/** * Generate random samples and add them to the sampleData * * @param num/* w w w . j av a 2 s . c o m*/ * int number of samples to generate * @param mx * double x-value of the sample mean * @param my * double y-value of the sample mean * @param sd * double standard deviation of the samples */ protected static void generateSamples(int num, double mx, double my, double sd) { double[] params = { mx, my, sd, sd }; SAMPLE_PARAMS.add(new DenseVector(params)); log.info("Generating {} samples m=[{}, {}] sd={}", num, mx, my, sd); for (int i = 0; i < num; i++) { SAMPLE_DATA.add(new VectorWritable(new DenseVector( new double[] { UncommonDistributions.rNorm(mx, sd), UncommonDistributions.rNorm(my, sd) }))); } }
From source file:DisplayClustering.java
License:Apache License
/** * Generate random samples and add them to the sampleData * * @param num//from w w w. j a v a 2 s.co m * int number of samples to generate * @param mx * double x-value of the sample mean * @param my * double y-value of the sample mean * @param sdx * double x-value standard deviation of the samples * @param sdy * double y-value standard deviation of the samples */ protected static void generate2dSamples(int num, double mx, double my, double sdx, double sdy) { double[] params = { mx, my, sdx, sdy }; SAMPLE_PARAMS.add(new DenseVector(params)); log.info("Generating {} samples m=[{}, {}] sd=[{}, {}]", num, mx, my, sdx, sdy); for (int i = 0; i < num; i++) { SAMPLE_DATA.add(new VectorWritable(new DenseVector( new double[] { UncommonDistributions.rNorm(mx, sdx), UncommonDistributions.rNorm(my, sdy) }))); } }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
public static void writeDistributedRowMatrix(Configuration conf, double[][] matrix, int rows, int columns, Path path, boolean saveTransposed) throws Exception { SequenceFile.Writer writer = null; try {/*from www .j a v a2 s . c o m*/ FileSystem fs = FileSystem.get(conf); writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); if (saveTransposed) { // Transpose Matrix before saving double[][] transposed = new double[columns][rows]; for (int i = 0; i < rows; i++) { for (int j = 0; j < columns; j++) { transposed[j][i] = matrix[i][j]; } } matrix = transposed; } for (int i = 0; i < matrix.length; i++) { DenseVector rowVector = new DenseVector(matrix[i]); writer.append(new IntWritable(i), new VectorWritable(rowVector)); } } catch (IOException e) { e.printStackTrace(); } finally { if (writer != null) { try { writer.close(); } catch (IOException e) { e.printStackTrace(); } } } }