List of usage examples for org.apache.mahout.math.hadoop DistributedRowMatrix getRowPath
public Path getRowPath()
From source file:com.skp.experiment.cf.als.hadoop.DistributedParallelALSFactorizationJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();//www . ja v a2 s. co m addOutputOption(); addOption("lambda", null, "regularization parameter", true); addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false)); addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40)); addOption("numFeatures", null, "dimension of the feature space", true); addOption("numIterations", null, "number of iterations", true); addOption("numUsers", null, "number of users", true); addOption("numItems", null, "number of items", true); addOption("blockSize", null, "dfs block size.", false); //addOption("runIterations", null, "true or false for iterations", true); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } numFeatures = Integer.parseInt(parsedArgs.get("--numFeatures")); numIterations = Integer.parseInt(parsedArgs.get("--numIterations")); lambda = Double.parseDouble(parsedArgs.get("--lambda")); alpha = Double.parseDouble(parsedArgs.get("--alpha")); implicitFeedback = Boolean.parseBoolean(parsedArgs.get("--implicitFeedback")); numUsers = Integer.parseInt(parsedArgs.get("--numUsers")); numItems = Integer.parseInt(parsedArgs.get("--numItems")); dfsBlockSize = getOption("blockSize") == null ? 64 * 1024 * 1024 : Long.parseLong(getOption("blockSize")); /* * compute the factorization A = U M' * * where A (users x items) is the matrix of known ratings * U (users x features) is the representation of users in the feature space * M (items x features) is the representation of items in the feature space */ /* create A' */ Job itemRatings = prepareJob(getInputPath(), pathToItemRatings(), TextInputFormat.class, ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); itemRatings.setCombinerClass(VectorSumReducer.class); itemRatings.waitForCompletion(true); //numItems = // (int) itemRatings.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); log.info("Number of Items\t{}", numItems); /* create A */ Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); userRatings.setCombinerClass(MergeVectorsCombiner.class); userRatings.waitForCompletion(true); //numUsers = // (int) userRatings.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); log.info("Number of Users\t{}", numUsers); /* count item per user */ Job userItemCntsJob = prepareJob(pathToUserRatings(), getOutputPath("userItemCnts"), SequenceFileInputFormat.class, UserItemCntsMapper.class, IntWritable.class, IntWritable.class, UserItemCntsReducer.class, IntWritable.class, IntWritable.class, SequenceFileOutputFormat.class); userItemCntsJob.setJobName("user ratings count"); userItemCntsJob.setCombinerClass(UserItemCntsReducer.class); userItemCntsJob.waitForCompletion(true); //TODO this could be fiddled into one of the upper jobs Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"), AverageRatingMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); averageItemRatings.setCombinerClass(MergeVectorsCombiner.class); averageItemRatings.waitForCompletion(true); Vector averageRatings = ALSMatrixUtil.readFirstRow(getTempPath("averageRatings"), getConf()); /* create an initial M */ initializeM(averageRatings); for (int currentIteration = 0; currentIteration < numIterations; currentIteration++) { DistributedRowMatrix curM = new DistributedRowMatrix(pathToM(currentIteration - 1), getTempPath("Mtemp" + String.valueOf(currentIteration - 1)), numItems, numFeatures); curM.setConf(new Configuration()); DistributedRowMatrix YtransposeY = curM.times(curM); // broadcast M, read A row-wise, recompute U row-wise // log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations); runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1), YtransposeY.getRowPath(), numItems); DistributedRowMatrix curU = new DistributedRowMatrix(pathToU(currentIteration), getTempPath("Utmp" + String.valueOf(currentIteration)), numUsers, numFeatures); curU.setConf(new Configuration()); DistributedRowMatrix XtransposeX = curU.times(curU); // set up index of U // CreateMapFileFromSeq.createMapFile(pathToU(currentIteration)); // broadcast U, read A' row-wise, recompute M row-wise // log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations); runDistributedImplicitSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration), XtransposeX.getRowPath(), numUsers); } return 0; }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();//from www . j av a 2 s . c om addOutputOption(); addOption("lambda", null, "regularization parameter", true); addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false)); addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40)); addOption("numFeatures", null, "dimension of the feature space", true); addOption("numIterations", null, "number of iterations", true); addOption("indexSizes", null, "index sizes Path", true); addOption("startIteration", null, "start iteration number", String.valueOf(0)); addOption("oldM", null, "old M matrix Path.", null); addOption("largeUserFeatures", null, "true if user x feature matrix is too large for memory", String.valueOf(true)); addOption("rmseCurve", null, "true if want to extract rmse curve", String.valueOf(true)); addOption("cleanUp", null, "true if want to clean up temporary matrix", String.valueOf(true)); addOption("useTransform", null, "true if using logarithm as transform", String.valueOf(true)); addOption("rateIndex", null, "0 based index for rate column in input file.", String.valueOf(2)); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } try { /** step 0: fetch dimention of training set matrix. */ Map<String, String> indexSizesTmp = ALSMatrixUtil.fetchTextFiles(new Path(getOption("indexSizes")), DELIMETER, Arrays.asList(0), Arrays.asList(1)); numFeatures = Integer.parseInt(parsedArgs.get("--numFeatures")); numIterations = Integer.parseInt(parsedArgs.get("--numIterations")); lambda = Double.parseDouble(parsedArgs.get("--lambda")); alpha = Double.parseDouble(parsedArgs.get("--alpha")); implicitFeedback = Boolean.parseBoolean(parsedArgs.get("--implicitFeedback")); numUsers = Integer.parseInt(indexSizesTmp.get("0")); numItems = Integer.parseInt(indexSizesTmp.get("1")); numTaskTrackers = HadoopClusterUtil.getNumberOfTaskTrackers(getConf()) * multiplyMapTasks; startIteration = Integer.parseInt(parsedArgs.get("--startIteration")); largeUserFeatures = Boolean.parseBoolean(getOption("largeUserFeatures")); useRMSECurve = Boolean.parseBoolean(getOption("rmseCurve")); cleanUp = Boolean.parseBoolean(getOption("cleanUp")); useTransform = Boolean.parseBoolean(getOption("useTransform")); rateIndex = Integer.parseInt(getOption("rateIndex")); FileSystem fs = FileSystem.get(getConf()); if (!fs.exists(pathToTransformed())) { if (useTransform) { // transform price into rating Job transformJob = prepareJob(getInputPath(), pathToTransformed(), TextInputFormat.class, TransformColumnValueMapper.class, NullWritable.class, Text.class, TextOutputFormat.class); transformJob.waitForCompletion(true); } else { FileUtil.copy(FileSystem.get(getConf()), getInputPath(), FileSystem.get(getConf()), pathToTransformed(), false, getConf()); } } /* if (getOption("oldM") != null) { runOnetimeSolver(pathToTransformed(), getOutputPath("U"), new Path(getOption("oldM"))); return 0; } */ /* * compute the factorization A = U M' * * where A (users x items) is the matrix of known ratings * U (users x features) is the representation of users in the feature space * M (items x features) is the representation of items in the feature space */ if (startIteration == 0) { if (!fs.exists(pathToItemRatings())) { // create A' Job itemRatings = prepareJob(pathToTransformed(), pathToItemRatings(), TextInputFormat.class, ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); itemRatings.setCombinerClass(VectorSumReducer.class); long matrixSizeExp = (long) (8L * numUsers * numFeatures * SAFE_MARGIN); long memoryThreshold = HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / (long) HadoopClusterUtil.MAP_TASKS_PER_NODE; int numTaskPerDataNode = Math.max(1, (int) (HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / (double) matrixSizeExp)); //log.info("matrix Size: " + matrixSizeExp + ", memorhThreshold: " + memoryThreshold + ", numTaskPerDataNode: " + numTaskPerDataNode); if (matrixSizeExp > memoryThreshold) { //log.info("A: {}", numTaskPerDataNode * HadoopClusterUtil.getNumberOfTaskTrackers(getConf())); int numReducer = Math.min( numTaskPerDataNode * HadoopClusterUtil.getNumberOfTaskTrackers(getConf()), HadoopClusterUtil.getMaxMapTasks(getConf())); //log.info("Number Of Reducer: " + numReducer); itemRatings.setNumReduceTasks(numReducer); } itemRatings.waitForCompletion(true); } if (!fs.exists(pathToUserRatings())) { Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); userRatings.setNumReduceTasks(HadoopClusterUtil.getNumberOfTaskTrackers(getConf())); userRatings.setCombinerClass(MergeVectorsCombiner.class); userRatings.setNumReduceTasks(HadoopClusterUtil.getMaxMapTasks(getConf())); userRatings.waitForCompletion(true); } if (!fs.exists(getOutputPath("userItemCnt"))) { // count item per user Job userItemCntsJob = prepareJob(pathToUserRatings(), getOutputPath("userItemCnt"), SequenceFileInputFormat.class, UserItemCntsMapper.class, IntWritable.class, IntWritable.class, SequenceFileOutputFormat.class); userItemCntsJob.setJobName("user ratings count"); userItemCntsJob.waitForCompletion(true); } if (!fs.exists(getTempPath("averageRatings"))) { //TODO this could be fiddled into one of the upper jobs Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"), AverageRatingMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); averageItemRatings.setCombinerClass(MergeVectorsCombiner.class); averageItemRatings.waitForCompletion(true); } if (!fs.exists(new Path(pathToM(-1), "part-m-00000"))) { Vector averageRatings = ALSMatrixUtil.readFirstRow(getTempPath("averageRatings"), getConf()); /** create an initial M */ initializeM(averageRatings); } } for (int currentIteration = startIteration; currentIteration < numIterations; currentIteration++) { DistributedRowMatrix curM = new DistributedRowMatrix(pathToM(currentIteration - 1), getTempPath("Mtemp/tmp-" + String.valueOf(currentIteration - 1) + "/M"), numItems, numFeatures); curM.setConf(getConf()); DistributedRowMatrix YtransposeY = curM.times(curM); /** broadcast M, read A row-wise, recompute U row-wise */ log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations); runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1), YtransposeY.getRowPath(), numItems, false); DistributedRowMatrix curU = new DistributedRowMatrix(pathToU(currentIteration), getTempPath("Utmp/tmp-" + String.valueOf(currentIteration) + "/U"), numUsers, numFeatures); curU.setConf(getConf()); DistributedRowMatrix XtransposeX = curU.times(curU); /** broadcast U, read A' row-wise, recompute M row-wise */ log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations); runSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration), XtransposeX.getRowPath(), numUsers, largeUserFeatures); /** calculate rmse on each updated matrix U, M and decide to further iteration */ if (currentIteration > startIteration && useRMSECurve) { Pair<Integer, Double> UsquaredError = calculateMatrixDistanceSquared( pathToU(currentIteration - 1), pathToU(currentIteration), currentIteration); Pair<Integer, Double> MsquaredError = calculateMatrixDistanceSquared( pathToM(currentIteration - 1), pathToM(currentIteration), currentIteration); String currentRMSE = currentIteration + DELIMETER + UsquaredError.getFirst() + DELIMETER + UsquaredError.getSecond() + DELIMETER + MsquaredError.getFirst() + DELIMETER + MsquaredError.getSecond() + DefaultOptionCreator.NEWLINE; rmsePerIteration += currentRMSE; log.info("iteration {}: {}", currentIteration, currentRMSE); } if (currentIteration >= startIteration + 2 && cleanUp) { fs.deleteOnExit(pathToU(currentIteration - 2)); fs.deleteOnExit(pathToM(currentIteration - 2)); } } return 0; } catch (Exception e) { e.printStackTrace(); return -1; } finally { if (useRMSECurve) { HadoopClusterUtil.writeToHdfs(getConf(), getOutputPath("RMSE"), rmsePerIteration); } } }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/*** * If the MapDir matrix is small, we can convert it to an in memory representation * and then run efficient centralized operations * /*from w ww. j a va2 s . c om*/ * @param origMtx in MapDir format (generated by MatrixOutputFormat) * @return a dense matrix including the data * @throws IOException */ public static DenseMatrix toDenseMatrix(DistributedRowMatrix origMtx) throws IOException { MapDir mapDir = new MapDir(new Configuration(), origMtx.getRowPath()); DenseMatrix mtx = new DenseMatrix(origMtx.numRows(), origMtx.numCols()); Iterator<MatrixSlice> sliceIterator; try { sliceIterator = mapDir.iterateAll(); } catch (Exception e) { log.info(e.toString()); log.info("Input is not in matrix format, trying SequenceFileFormat instead ..."); sliceIterator = origMtx.iterateAll(); } while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); // int r = slice.index(); // for (int c = 0; c < mtx.numCols(); c++) { // mtx.set(r, c, slice.get(c)); // } mtx.viewRow(slice.index()).assign(slice.vector()); } mapDir.close(); return mtx; }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/*** * If the MapDir matrix is small, we can convert it to an in memory representation * and then run efficient centralized operations * /*from ww w. j ava2s . c o m*/ * @param origMtx in MapDir format (generated by MatrixOutputFormat) * @return a dense matrix including the data * @throws IOException */ static SparseMatrix toSparseMatrix(DistributedRowMatrix origMtx) throws IOException { MapDir mapDir = new MapDir(new Configuration(), origMtx.getRowPath()); SparseMatrix mtx = new SparseMatrix(origMtx.numRows(), origMtx.numCols()); Iterator<MatrixSlice> sliceIterator = mapDir.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); mtx.viewRow(slice.index()).assign(slice.vector()); } mapDir.close(); return mtx; }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/** * Trace of a matrix obtained in a centralized way. For some reason, which I did not have time to debug, raises memory exception for big matrices. * //from w w w. ja va2 s .c om * TODO: MapReduce job for traces of big matrices. * @param origMtx * @return trace of the input matrix * @throws IOException */ static double trace(DistributedRowMatrix origMtx) throws IOException { MapDir mapDir = new MapDir(new Configuration(), origMtx.getRowPath()); Iterator<MatrixSlice> sliceIterator = mapDir.iterateAll(); double trace = 0; while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); int index = slice.index(); if (index >= slice.vector().size()) break; double value = slice.vector().get(index); trace += Double.isNaN(value) ? 0 : value; } mapDir.close(); return trace; }
From source file:com.twitter.algebra.matrix.format.Sequence2MatrixFormatJob.java
License:Apache License
public static DistributedRowMatrix run(Configuration conf, DistributedRowMatrix A, String label) throws IOException, InterruptedException, ClassNotFoundException { log.info("running " + Sequence2MatrixFormatJob.class.getName()); Path outPath = new Path(A.getOutputTempPath(), label); FileSystem fs = FileSystem.get(outPath.toUri(), conf); Sequence2MatrixFormatJob job = new Sequence2MatrixFormatJob(); if (!fs.exists(outPath)) { job.run(conf, A.getRowPath(), outPath); } else {/*ww w .j av a2s .c om*/ log.warn("----------- Skip already exists: " + outPath); } DistributedRowMatrix distRes = new DistributedRowMatrix(outPath, A.getOutputTempPath(), A.numRows(), A.numCols()); distRes.setConf(conf); return distRes; }
From source file:com.twitter.algebra.matrix.multiply.ABInnerHDFSBroadcastOfB.java
License:Apache License
/** * Perform A x B, where A and B are already wrapped in a DistributedRowMatrix * object. Refer to {@link ABInnerHDFSBroadcastOfB} for further details. * /*from w ww. j av a2 s.c om*/ * @param conf the initial configuration * @param A matrix A * @param B matrix B * @param label the label for the output directory * @return AxB wrapped in a DistributedRowMatrix object * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static DistributedRowMatrix run(Configuration conf, DistributedRowMatrix A, DistributedRowMatrix B, String label) throws IOException, InterruptedException, ClassNotFoundException { log.info("running " + ABInnerHDFSBroadcastOfB.class.getName()); if (A.numCols() != B.numRows()) { throw new CardinalityException(A.numCols(), B.numRows()); } Path outPath = new Path(A.getOutputTempPath(), label); FileSystem fs = FileSystem.get(outPath.toUri(), conf); ABInnerHDFSBroadcastOfB job = new ABInnerHDFSBroadcastOfB(); if (!fs.exists(outPath)) { job.run(conf, A.getRowPath(), B.getRowPath(), outPath, B.numRows(), B.numCols()); } else { log.warn("----------- Skip already exists: " + outPath); } DistributedRowMatrix distRes = new DistributedRowMatrix(outPath, A.getOutputTempPath(), A.numRows(), B.numCols()); distRes.setConf(conf); return distRes; }
From source file:com.twitter.algebra.matrix.multiply.ABOuterHDFSBroadcastOfA.java
License:Apache License
/** * Perform A x B, where A and B are already wrapped in a DistributedRowMatrix * object. Refer to {@link ABOuterHDFSBroadcastOfA} for further details. * //from ww w .j av a 2 s . com * @param conf * the initial configuration * @param A * matrix A * @param B * matrix B * @param label * the label for the output directory * @return AxB wrapped in a DistributedRowMatrix object * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static DistributedRowMatrix run(Configuration conf, DistributedRowMatrix A, DistributedRowMatrix B, String label) throws IOException, InterruptedException, ClassNotFoundException { log.info("running " + ABOuterHDFSBroadcastOfA.class.getName()); if (A.numCols() != B.numRows()) { throw new CardinalityException(A.numCols(), B.numRows()); } Path outPath = new Path(A.getOutputTempPath(), label); FileSystem fs = FileSystem.get(outPath.toUri(), conf); ABOuterHDFSBroadcastOfA job = new ABOuterHDFSBroadcastOfA(); if (!fs.exists(outPath)) { job.run(conf, A.getRowPath(), B.getRowPath(), outPath, A.numRows(), A.numCols()); } else { log.warn("----------- Skip already exists: " + outPath); } DistributedRowMatrix distRes = new DistributedRowMatrix(outPath, A.getOutputTempPath(), A.numRows(), B.numCols()); distRes.setConf(conf); return distRes; }
From source file:com.twitter.algebra.matrix.multiply.AtBOuterStaticMapsideJoinJob.java
License:Apache License
public static DistributedRowMatrix run(Configuration conf, DistributedRowMatrix A, DistributedRowMatrix B, String label) throws IOException, InterruptedException, ClassNotFoundException { log.info("running " + AtBOuterStaticMapsideJoinJob.class.getName()); if (A.numRows() != B.numRows()) { throw new CardinalityException(A.numRows(), B.numRows()); }/*from w ww.j a v a 2 s . c om*/ Path outPath = new Path(A.getOutputTempPath(), label); FileSystem fs = FileSystem.get(outPath.toUri(), conf); AtBOuterStaticMapsideJoinJob job = new AtBOuterStaticMapsideJoinJob(); if (!fs.exists(outPath)) { job.run(conf, A.getRowPath(), B.getRowPath(), outPath, B.numCols()); } else { log.warn("----------- Skip already exists: " + outPath); } DistributedRowMatrix distRes = new DistributedRowMatrix(outPath, A.getOutputTempPath(), A.numCols(), B.numCols()); distRes.setConf(conf); return distRes; }
From source file:com.twitter.algebra.matrix.multiply.AtB_DMJ.java
License:Apache License
/** * Perform A x B, where At and B are already wrapped in a DistributedRowMatrix * object. Refer to {@link AtB_DMJ} for further details. * /*w w w . j a v a2 s . c om*/ * Automatically decide on partitioning the larger matrix to be used with * in-memory combiners. * * @param conf the initial configuration * @param At transpose of matrix A * @param B matrix B * @param label the label for the output directory * @param labelAtCol by using a fixed label for AtCol one can avoid the second * run of the partitioning job if we know that At is not changed * @param lableBCol by using a fixed label for BCol one can avoid the second * run of the partitioning job if we know that B is not changed * @return AxB wrapped in a DistributedRowMatrix object * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static DistributedRowMatrix smartRun(Configuration conf, DistributedRowMatrix At, DistributedRowMatrix B, String label, String labelAtCol, String lableBCol) throws IOException, InterruptedException, ClassNotFoundException { log.info("running " + AtB_DMJ.class.getName()); if (At.numRows() != B.numRows()) throw new CardinalityException(At.numRows(), B.numRows()); Path outPath = new Path(At.getOutputTempPath(), label); FileSystem fs = FileSystem.get(outPath.toUri(), conf); AtB_DMJ job = new AtB_DMJ(); if (!fs.exists(outPath)) { int numColPartitionsAt = 1, numColPartitionsB = 1; int numColPartitions = NMFCommon.computeOptColPartitionsForMemCombiner(conf, At.numCols(), B.numCols()); long atSize = MapDir.du(At.getRowPath(), fs); long bSize = MapDir.du(B.getRowPath(), fs); //cost is size of remote reads. For each col partition we need to read the entire of the other matrix once long atPartitionCost = numColPartitions * bSize; long bPartitionCost = numColPartitions * atSize; log.info("smart partitioning: numColPartitions: " + numColPartitions + " atSize: " + atSize + " bSize: " + bSize + " atCost=" + atPartitionCost + " vs. bCost=" + bPartitionCost); if (atPartitionCost < bPartitionCost) { At = ColPartitionJob.partition(At, conf, labelAtCol, numColPartitions); numColPartitionsAt = numColPartitions; } else { B = ColPartitionJob.partition(B, conf, lableBCol, numColPartitions); numColPartitionsB = numColPartitions; } job.run(conf, At.getRowPath(), B.getRowPath(), outPath, At.numCols(), B.numCols(), numColPartitionsAt, numColPartitionsB, true); } else { log.warn("----------- Skip already exists: " + outPath); } DistributedRowMatrix distRes = new DistributedRowMatrix(outPath, At.getOutputTempPath(), At.numCols(), B.numCols()); distRes.setConf(conf); return distRes; }