List of usage examples for org.apache.mahout.math CardinalityException CardinalityException
public CardinalityException(int expected, int cardinality)
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.cpu.MatrixMultiplicationCpu.java
License:Apache License
@Override public int run(String[] strings) throws Exception { addOption("numRowsA", "nra", "Number of rows of the first input matrix", true); addOption("numColsA", "nca", "Number of columns of the first input matrix", true); addOption("numRowsB", "nrb", "Number of rows of the second input matrix", true); addOption("numColsB", "ncb", "Number of columns of the second input matrix", true); addOption("debug", "db", "Enable debugging (true|false)", false); Map<String, List<String>> argMap = parseArguments(strings); if (argMap == null) { return -1; }//from www. j av a 2 s. co m int numRowsA = Integer.parseInt(getOption("numRowsA")); int numColsA = Integer.parseInt(getOption("numColsA")); int numRowsB = Integer.parseInt(getOption("numRowsB")); int numColsB = Integer.parseInt(getOption("numColsB")); boolean isDebugging = Boolean.parseBoolean(getOption("debug")); LOG.info("numRowsA: " + numRowsA); LOG.info("numColsA: " + numColsA); LOG.info("numRowsB: " + numRowsB); LOG.info("numColsB: " + numColsB); LOG.info("isDebugging: " + isDebugging); LOG.info("outputPath: " + OUTPUT_DIR); if (numColsA != numRowsB) { throw new CardinalityException(numColsA, numRowsB); } Configuration conf = new Configuration(getConf()); // Create random DistributedRowMatrix // use constant seeds to get reproducable results // Matrix A is stored transposed DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_TRANSPOSED_PATH, true); DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L), MATRIX_B_PATH, false); // Load DistributedRowMatrix a and b DistributedRowMatrix aTransposed = new DistributedRowMatrix(MATRIX_A_TRANSPOSED_PATH, OUTPUT_DIR, numRowsA, numColsA); aTransposed.setConf(conf); DistributedRowMatrix b = new DistributedRowMatrix(MATRIX_B_PATH, OUTPUT_DIR, numRowsB, numColsB); b.setConf(conf); // MatrixMultiply all within a new MapReduce job long startTime = System.currentTimeMillis(); DistributedRowMatrix c = aTransposed.multiplyMapReduce(b, MATRIX_C_PATH, false, true, 0, isDebugging); System.out.println("MatrixMultiplicationCpu using Hadoop finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); // Verification // Overwrite matrix A, NOT transposed for verification check DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_PATH, false); DistributedRowMatrix a = new DistributedRowMatrix(MATRIX_A_PATH, OUTPUT_DIR, numRowsA, numColsA); a.setConf(conf); DistributedRowMatrix d = a.multiplyJava(b, MATRIX_D_PATH); if (c.verify(d)) { System.out.println("Verify PASSED!"); } else { System.out.println("Verify FAILED!"); } if (isDebugging) { System.out.println("Matrix A:"); a.printDistributedRowMatrix(); System.out.println("Matrix A transposed:"); aTransposed.printDistributedRowMatrix(); System.out.println("Matrix B:"); b.printDistributedRowMatrix(); System.out.println("Matrix C:"); c.printDistributedRowMatrix(); System.out.println("Matrix D:"); d.printDistributedRowMatrix(); printOutput(conf); } return 0; }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix this.transpose().times(other) * /* w w w . jav a 2 s . co m*/ * @param other a DistributedRowMatrix * @param outPath path to write result to * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix times(DistributedRowMatrix other, Path outPath) throws IOException { if (numRows != other.numRows()) { throw new CardinalityException(numRows, other.numRows()); } Configuration initialConf = getConf() == null ? new Configuration() : getConf(); Configuration conf = MatrixMultiplicationJob.createMatrixMultiplyJobConf(initialConf, rowPath, other.rowPath, outPath, other.numCols); JobClient.runJob(new JobConf(conf)); DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, numCols, other.numCols()); out.setConf(conf); return out; }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix multiplication A * B using MapReduce tasks on CPU or * GPU/*from w w w . ja v a 2 s. c om*/ * * @param other a DistributedRowMatrix * @param outPath path to write result to * @param useGPU use GPU or CPU (default: false, use CPU) * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix multiplyMapReduce(DistributedRowMatrix other, Path outPath, boolean useGPU, boolean isMatrixATransposed, int tileWidth, boolean isDebugging) throws IOException { // Check if cols of MatrixA = rows of MatrixB // (l x m) * (m x n) = (l x n) if (numCols != other.numRows()) { throw new CardinalityException(numCols, other.numRows()); } Configuration initialConf = (getConf() == null) ? new Configuration() : getConf(); // Transpose Matrix within a new MapReduce Job DistributedRowMatrix transposed = this; if (!isMatrixATransposed) { transposed = transposed.transpose(); } // Debug // System.out.println("DistributedRowMatrix transposed:"); // transposed.printDistributedRowMatrix(); // Build MatrixMultiplication job configuration Configuration conf = null; if (!useGPU) { conf = MatrixMultiplicationCpu.createMatrixMultiplicationCpuConf(initialConf, transposed.rowPath, other.rowPath, outPath, other.numCols, isDebugging); } else { // use GPU conf = MatrixMultiplicationGpu.createMatrixMultiplicationGpuConf(initialConf, transposed.rowPath, other.rowPath, outPath, other.numCols, tileWidth, isDebugging); } // Multiply Matrix with transposed one JobClient.runJob(new JobConf(conf)); // Read resulting Matrix from HDFS DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols()); out.setConf(conf); return out; }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix multiplication A * B in Java without using MapReduce * tasks//from ww w .jav a 2 s .com * * @param other a DistributedRowMatrix * @param outPath path to write result to * * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix multiplyJava(DistributedRowMatrix other, Path outPath) throws IOException { // Check if cols of MatrixA = rows of MatrixB // (l x m) * (m x n) = (l x n) if (numCols != other.numRows()) { throw new CardinalityException(numCols, other.numRows()); } // Multiply Matrix with transposed one without new MapReduce Job final double[][] matrixA = this.toDoubleArray(); final double[][] matrixB = other.toDoubleArray(); final double[][] matrixC = new double[this.numRows][other.numCols]; int m = this.numRows; int n = this.numCols; int p = other.numCols; for (int k = 0; k < n; k++) { for (int i = 0; i < m; i++) { for (int j = 0; j < p; j++) { matrixC[i][j] = matrixC[i][j] + matrixA[i][k] * matrixB[k][j]; } } } // Save resulting Matrix to HDFS try { writeDistributedRowMatrix(this.conf, matrixC, this.numRows, other.numCols, outPath, false); } catch (Exception e) { e.printStackTrace(); } // Read resulting Matrix from HDFS DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, numCols, other.numCols()); out.setConf(conf); return out; }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.gpu.MatrixMultiplicationGpu.java
License:Apache License
@Override public int run(String[] strings) throws Exception { addOption("numRowsA", "nra", "Number of rows of the first input matrix", true); addOption("numColsA", "nca", "Number of columns of the first input matrix", true); addOption("numRowsB", "nrb", "Number of rows of the second input matrix", true); addOption("numColsB", "ncb", "Number of columns of the second input matrix", true); addOption("tileWidth", "tw", "TileWidth denotes the size of a submatrix", true); addOption("debug", "db", "Enable debugging (true|false)", false); Map<String, List<String>> argMap = parseArguments(strings); if (argMap == null) { return -1; }// ww w. jav a2 s . c o m int numRowsA = Integer.parseInt(getOption("numRowsA")); int numColsA = Integer.parseInt(getOption("numColsA")); int numRowsB = Integer.parseInt(getOption("numRowsB")); int numColsB = Integer.parseInt(getOption("numColsB")); // TILE_WITH = 32 // --> 2 * 32 = 1024 threads matches the blocksize int tileWidth = Integer.parseInt(getOption("tileWidth")); boolean isDebugging = Boolean.parseBoolean(getOption("debug")); LOG.info("numRowsA: " + numRowsA); LOG.info("numColsA: " + numColsA); LOG.info("numRowsB: " + numRowsB); LOG.info("numColsB: " + numColsB); LOG.info("tileWidth: " + tileWidth); LOG.info("isDebugging: " + isDebugging); LOG.info("outputPath: " + OUTPUT_DIR); if (numColsA != numRowsB) { throw new CardinalityException(numColsA, numRowsB); } Configuration conf = new Configuration(getConf()); // Create random DistributedRowMatrix // use constant seeds to get reproducable results // Matrix A is stored transposed DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_TRANSPOSED_PATH, true); DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L), MATRIX_B_PATH, false); // Load DistributedRowMatrix a and b DistributedRowMatrix aTransposed = new DistributedRowMatrix(MATRIX_A_TRANSPOSED_PATH, OUTPUT_DIR, numRowsA, numColsA); aTransposed.setConf(conf); DistributedRowMatrix b = new DistributedRowMatrix(MATRIX_B_PATH, OUTPUT_DIR, numRowsB, numColsB); b.setConf(conf); // MatrixMultiply all within a new MapReduce job long startTime = System.currentTimeMillis(); DistributedRowMatrix c = aTransposed.multiplyMapReduce(b, MATRIX_C_PATH, true, true, tileWidth, isDebugging); System.out.println("MatrixMultiplicationGpu using Hadoop finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); // Verification // Overwrite matrix A, NOT transposed for verification check DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_PATH, false); DistributedRowMatrix a = new DistributedRowMatrix(MATRIX_A_PATH, OUTPUT_DIR, numRowsA, numColsA); a.setConf(conf); DistributedRowMatrix d = a.multiplyJava(b, MATRIX_D_PATH); if (c.verify(d)) { System.out.println("Verify PASSED!"); } else { System.out.println("Verify FAILED!"); } if (isDebugging) { System.out.println("Matrix A:"); a.printDistributedRowMatrix(); System.out.println("Matrix A transposed:"); aTransposed.printDistributedRowMatrix(); System.out.println("Matrix B:"); b.printDistributedRowMatrix(); System.out.println("Matrix C:"); c.printDistributedRowMatrix(); System.out.println("Matrix D:"); d.printDistributedRowMatrix(); printOutput(conf); } return 0; }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.cpu.MatrixMultiplicationBSPCpu.java
License:Apache License
public static void main(String[] args) throws Exception { // Defaults//from ww w . jav a 2 s .c om int numRowsA = 10; int numColsA = 10; int numRowsB = 10; int numColsB = 10; boolean isDebugging = false; Configuration conf = new HamaConfiguration(); BSPJobClient jobClient = new BSPJobClient(conf); ClusterStatus cluster = jobClient.getClusterStatus(true); if (args.length > 0) { if (args.length == 6) { conf.setInt("bsp.peers.num", Integer.parseInt(args[0])); numRowsA = Integer.parseInt(args[1]); numColsA = Integer.parseInt(args[2]); numRowsB = Integer.parseInt(args[3]); numColsB = Integer.parseInt(args[4]); isDebugging = Boolean.parseBoolean(args[5]); } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numRowsA | Number of rows of the first input matrix"); System.out.println(" Argument3=numColsA | Number of columns of the first input matrix"); System.out.println(" Argument4=numRowsB | Number of rows of the second input matrix"); System.out.println(" Argument5=numColsB | Number of columns of the second input matrix"); System.out.println(" Argument6=debug | Enable debugging (true|false)"); return; } } else { conf.setInt("bsp.peers.num", cluster.getMaxTasks()); } conf.setInt("matrixmultiplication.bsp.cpu.numRowsA", numRowsA); conf.setInt("matrixmultiplication.bsp.cpu.numColsA", numColsA); conf.setInt("matrixmultiplication.bsp.cpu.numRowsB", numRowsB); conf.setInt("matrixmultiplication.bsp.cpu.numColsB", numRowsB); conf.setBoolean(DEBUG, isDebugging); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("numRowsA: " + numRowsA); LOG.info("numColsA: " + numColsA); LOG.info("numRowsB: " + numRowsB); LOG.info("numColsB: " + numColsB); LOG.info("isDebugging: " + isDebugging); LOG.info("outputPath: " + OUTPUT_DIR); if (numColsA != numRowsB) { throw new CardinalityException(numColsA, numRowsB); } // Create random DistributedRowMatrix // use constant seeds to get reproducable results // Matrix A is stored transposed DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_PATH, true); DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L), MATRIX_B_PATH, false); // Load DistributedRowMatrix a and b DistributedRowMatrix a = new DistributedRowMatrix(MATRIX_A_PATH, OUTPUT_DIR, numRowsA, numColsA); a.setConf(conf); DistributedRowMatrix b = new DistributedRowMatrix(MATRIX_B_PATH, OUTPUT_DIR, numRowsB, numColsB); b.setConf(conf); // MatrixMultiply all within a new BSP job long startTime = System.currentTimeMillis(); DistributedRowMatrix c = a.multiplyBSP(b, MATRIX_C_PATH, false, false); System.out.println("MatrixMultiplicationCpu using Hama finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); // Verification // Overwrite matrix A, NOT transposed for verification check DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_PATH, false); a = new DistributedRowMatrix(MATRIX_A_PATH, OUTPUT_DIR, numRowsA, numColsA); a.setConf(conf); DistributedRowMatrix d = a.multiplyJava(b, MATRIX_D_PATH); if (c.verify(d)) { System.out.println("Verify PASSED!"); } else { System.out.println("Verify FAILED!"); } if (isDebugging) { System.out.println("Matrix A:"); a.printDistributedRowMatrix(); System.out.println("Matrix B:"); b.printDistributedRowMatrix(); System.out.println("Matrix C:"); c.printDistributedRowMatrix(); System.out.println("Matrix D:"); d.printDistributedRowMatrix(); printOutput(conf); } }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.gpu.MatrixMultiplicationBSPGpu.java
License:Apache License
public static void main(String[] args) throws Exception { // Defaults/*from w ww .jav a2 s . com*/ int numRowsA = 10; int numColsA = 10; int numRowsB = 10; int numColsB = 10; boolean isDebugging = false; Configuration conf = new HamaConfiguration(); BSPJobClient jobClient = new BSPJobClient(conf); ClusterStatus cluster = jobClient.getClusterStatus(true); if (args.length > 0) { if (args.length == 6) { conf.setInt("bsp.peers.num", Integer.parseInt(args[0])); numRowsA = Integer.parseInt(args[1]); numColsA = Integer.parseInt(args[2]); numRowsB = Integer.parseInt(args[3]); numColsB = Integer.parseInt(args[4]); isDebugging = Boolean.parseBoolean(args[5]); } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numRowsA | Number of rows of the first input matrix"); System.out.println(" Argument3=numColsA | Number of columns of the first input matrix"); System.out.println(" Argument4=numRowsB | Number of rows of the second input matrix"); System.out.println(" Argument5=numColsB | Number of columns of the second input matrix"); System.out.println(" Argument6=debug | Enable debugging (true|false)"); return; } } else { conf.setInt("bsp.peers.num", cluster.getMaxTasks()); } conf.setInt("matrixmultiplication.bsp.gpu.numRowsA", numRowsA); conf.setInt("matrixmultiplication.bsp.gpu.numColsA", numColsA); conf.setInt("matrixmultiplication.bsp.gpu.numRowsB", numRowsB); conf.setInt("matrixmultiplication.bsp.gpu.numColsB", numRowsB); conf.setBoolean(DEBUG, isDebugging); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("numRowsA: " + numRowsA); LOG.info("numColsA: " + numColsA); LOG.info("numRowsB: " + numRowsB); LOG.info("numColsB: " + numColsB); LOG.info("isDebugging: " + isDebugging); LOG.info("outputPath: " + OUTPUT_DIR); if (numColsA != numRowsB) { throw new CardinalityException(numColsA, numRowsB); } // Create random DistributedRowMatrix // use constant seeds to get reproducable results // Matrix A is stored transposed DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_PATH, true); DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L), MATRIX_B_PATH, false); // Load DistributedRowMatrix a and b DistributedRowMatrix a = new DistributedRowMatrix(MATRIX_A_PATH, OUTPUT_DIR, numRowsA, numColsA); a.setConf(conf); DistributedRowMatrix b = new DistributedRowMatrix(MATRIX_B_PATH, OUTPUT_DIR, numRowsB, numColsB); b.setConf(conf); // MatrixMultiply all within a new BSP job long startTime = System.currentTimeMillis(); DistributedRowMatrix c = a.multiplyBSP(b, MATRIX_C_PATH, true, false); System.out.println("MatrixMultiplicationCpu using Hama finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); // Verification // Overwrite matrix A, NOT transposed for verification check DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_PATH, false); a = new DistributedRowMatrix(MATRIX_A_PATH, OUTPUT_DIR, numRowsA, numColsA); a.setConf(conf); DistributedRowMatrix d = a.multiplyJava(b, MATRIX_D_PATH); if (c.verify(d)) { System.out.println("Verify PASSED!"); } else { System.out.println("Verify FAILED!"); } if (isDebugging) { System.out.println("Matrix A:"); a.printDistributedRowMatrix(); System.out.println("Matrix B:"); b.printDistributedRowMatrix(); System.out.println("Matrix C:"); c.printDistributedRowMatrix(); System.out.println("Matrix D:"); d.printDistributedRowMatrix(); printOutput(conf); } }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.util.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix multiplication A * B using MapReduce tasks on CPU or * GPU/*from www .j a v a 2 s . com*/ * * @param other a DistributedRowMatrix * @param outPath path to write result to * @param useGPU use GPU or CPU (default: false, use CPU) * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU, boolean transposeMatrixA) throws IOException, ClassNotFoundException, InterruptedException { // Check if cols of MatrixA = rows of MatrixB // (l x m) * (m x n) = (l x n) if (numCols != other.numRows()) { throw new CardinalityException(numCols, other.numRows()); } Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf(); // Transpose Matrix within a new MapReduce Job DistributedRowMatrix transposed = this; if (transposeMatrixA) { transposed = transposed.transpose(); } // Debug // System.out.println("DistributedRowMatrix transposed:"); // transposed.printDistributedRowMatrix(); // Build MatrixMultiplication job configuration BSPJob job = null; if (!useGPU) { job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, transposed.rowPath, other.rowPath, outPath.getParent(), other.numCols); } else { // use GPU job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, transposed.rowPath, other.rowPath, outPath.getParent(), other.numCols); } // Multiply Matrix with transposed one if (job.waitForCompletion(true)) { // Rename result file to output path Configuration conf = job.getConfiguration(); FileSystem fs = outPath.getFileSystem(conf); FileStatus[] files = fs.listStatus(outPath.getParent()); for (int i = 0; i < files.length; i++) { if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) { fs.rename(files[i].getPath(), outPath); break; } } // Read resulting Matrix from HDFS DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols()); out.setConf(conf); return out; } return null; }
From source file:com.twitter.algebra.matrix.multiply.ABInnerHDFSBroadcastOfB.java
License:Apache License
/** * Perform A x B, where A and B are already wrapped in a DistributedRowMatrix * object. Refer to {@link ABInnerHDFSBroadcastOfB} for further details. * /*from www .ja v a2 s . com*/ * @param conf the initial configuration * @param A matrix A * @param B matrix B * @param label the label for the output directory * @return AxB wrapped in a DistributedRowMatrix object * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static DistributedRowMatrix run(Configuration conf, DistributedRowMatrix A, DistributedRowMatrix B, String label) throws IOException, InterruptedException, ClassNotFoundException { log.info("running " + ABInnerHDFSBroadcastOfB.class.getName()); if (A.numCols() != B.numRows()) { throw new CardinalityException(A.numCols(), B.numRows()); } Path outPath = new Path(A.getOutputTempPath(), label); FileSystem fs = FileSystem.get(outPath.toUri(), conf); ABInnerHDFSBroadcastOfB job = new ABInnerHDFSBroadcastOfB(); if (!fs.exists(outPath)) { job.run(conf, A.getRowPath(), B.getRowPath(), outPath, B.numRows(), B.numCols()); } else { log.warn("----------- Skip already exists: " + outPath); } DistributedRowMatrix distRes = new DistributedRowMatrix(outPath, A.getOutputTempPath(), A.numRows(), B.numCols()); distRes.setConf(conf); return distRes; }
From source file:com.twitter.algebra.matrix.multiply.ABOuterHDFSBroadcastOfA.java
License:Apache License
/** * Perform A x B, where A and B are already wrapped in a DistributedRowMatrix * object. Refer to {@link ABOuterHDFSBroadcastOfA} for further details. * /*from w w w .j av a 2 s . co m*/ * @param conf * the initial configuration * @param A * matrix A * @param B * matrix B * @param label * the label for the output directory * @return AxB wrapped in a DistributedRowMatrix object * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static DistributedRowMatrix run(Configuration conf, DistributedRowMatrix A, DistributedRowMatrix B, String label) throws IOException, InterruptedException, ClassNotFoundException { log.info("running " + ABOuterHDFSBroadcastOfA.class.getName()); if (A.numCols() != B.numRows()) { throw new CardinalityException(A.numCols(), B.numRows()); } Path outPath = new Path(A.getOutputTempPath(), label); FileSystem fs = FileSystem.get(outPath.toUri(), conf); ABOuterHDFSBroadcastOfA job = new ABOuterHDFSBroadcastOfA(); if (!fs.exists(outPath)) { job.run(conf, A.getRowPath(), B.getRowPath(), outPath, A.numRows(), A.numCols()); } else { log.warn("----------- Skip already exists: " + outPath); } DistributedRowMatrix distRes = new DistributedRowMatrix(outPath, A.getOutputTempPath(), A.numRows(), B.numCols()); distRes.setConf(conf); return distRes; }