List of usage examples for org.apache.hadoop.fs Path toUri
public URI toUri()
From source file:com.twitter.algebra.nmf.ReindexerJob.java
License:Apache License
public static int index(Configuration conf, Path input, Path tmpPath, String label) throws IOException, InterruptedException, ClassNotFoundException { Path outputPath = new Path(tmpPath, label); FileSystem fs = FileSystem.get(outputPath.toUri(), conf); ReindexerJob job = new ReindexerJob(); if (!fs.exists(outputPath)) { Job mrJob = job.run(conf, input, outputPath); long totalIndex = mrJob.getCounters().getGroup(TOTALINDEX_COUNTER_GROUP) .findCounter(TOTALINDEX_COUNTER_NAME).getValue(); return (int) totalIndex; } else {/* ww w .j av a 2 s. c o m*/ log.warn("----------- Skip already exists: " + outputPath); return -1; } }
From source file:com.twitter.algebra.nmf.ReindexerJob.java
License:Apache License
public Job run(Configuration conf, Path matrixInputPath, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(ReindexerJob.class); job.setJobName(ReindexerJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(KeyValueTextInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(MyReducer.class); // this makes the reindexing very slow but is necessary to have total order job.setNumReduceTasks(1);//from ww w .j av a 2s.com job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IntWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); return job; }
From source file:com.twitter.algebra.nmf.RowSquareSumJob.java
License:Apache License
/** * Returns the path to the vector that contains the sum of the rows of A * @param conf//from ww w . j av a2 s .c o m * @param A * @param label * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static Path run(Configuration conf, DistributedRowMatrix A, String label) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); log.info("running " + RowSquareSumJob.class.getName()); Path outPath = new Path(A.getOutputTempPath(), label); FileSystem fs = FileSystem.get(outPath.toUri(), conf); RowSquareSumJob job = new RowSquareSumJob(); if (!fs.exists(outPath)) { job.run(conf, A.getRowPath(), outPath, A.numRows()); } else { log.warn("----------- Skip already exists: " + outPath); } // Matrix centRes = AlgebraCommon.mapDirToSparseMatrix(outPath, A.numRows(), // A.numCols(), conf); // Vector resVec = centRes.viewRow(0); // System.out.println("Sum of the rows of " + A.getRowPath()); // System.out.println(resVec); return outPath; }
From source file:com.twitter.algebra.nmf.RowSquareSumJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int aRows) throws IOException, InterruptedException, ClassNotFoundException { @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(RowSquareSumJob.class); job.setJobName(RowSquareSumJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); int numReducers = 1; job.setNumReduceTasks(numReducers);/*from w w w.j a v a 2 s . c o m*/ job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(SumMapper.class); job.setCombinerClass(MergeVectorsReducer.class); job.setReducerClass(MergeVectorsReducer.class); // RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, // aRows); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.nmf.SampleColsJob.java
License:Apache License
public static DistributedRowMatrix run(Configuration conf, DistributedRowMatrix A, float sampleRate, String label) throws IOException, InterruptedException, ClassNotFoundException { log.info("running " + SampleColsJob.class.getName()); Path outPath = new Path(A.getOutputTempPath(), label); FileSystem fs = FileSystem.get(outPath.toUri(), conf); SampleColsJob job = new SampleColsJob(); if (!fs.exists(outPath)) { job.run(conf, A.getRowPath(), A.numCols(), outPath, sampleRate); } else {/*from www . j a va2 s. c o m*/ log.warn("----------- Skip already exists: " + outPath); } DistributedRowMatrix distRes = new DistributedRowMatrix(outPath, A.getOutputTempPath(), A.numRows(), A.numCols()); distRes.setConf(conf); return distRes; }
From source file:com.twitter.algebra.nmf.SampleColsJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, int cols, Path matrixOutputPath, float sampleRate) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.setFloat(SAMPLERATE, sampleRate); conf.setInt(COLS, cols);/*from ww w.ja va2 s. c o m*/ FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplecol"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(SampleColsJob.class); job.setJobName(SampleColsJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.nmf.SampleRowsJob.java
License:Apache License
public static DistributedRowMatrix run(Configuration conf, DistributedRowMatrix A, float sampleRate, String label) throws IOException, InterruptedException, ClassNotFoundException { log.info("running " + SampleRowsJob.class.getName()); Path outPath = new Path(A.getOutputTempPath(), label); FileSystem fs = FileSystem.get(outPath.toUri(), conf); SampleRowsJob job = new SampleRowsJob(); if (!fs.exists(outPath)) { job.run(conf, A.getRowPath(), outPath, sampleRate); } else {// w w w.j a v a 2 s . c o m log.warn("----------- Skip already exists: " + outPath); } DistributedRowMatrix distRes = new DistributedRowMatrix(outPath, A.getOutputTempPath(), A.numRows(), A.numCols()); distRes.setConf(conf); return distRes; }
From source file:com.twitter.algebra.nmf.SampleRowsJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, float sampleRate) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.setFloat(SAMPLERATE, sampleRate); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplerows"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(SampleRowsJob.class); job.setJobName(SampleRowsJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setNumReduceTasks(0);//from ww w .j ava2 s . c om job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.nmf.XtXJob.java
License:Apache License
public DistributedRowMatrix computeXtX(DistributedRowMatrix xMatrix, Vector xm, Path tmpPath, Configuration conf, String id) throws IOException, InterruptedException, ClassNotFoundException { Path outPath = new Path(tmpPath, "XtX-" + id); // Path xmPath = // AlgebraCommon.toDistributedVector(xm, tmpPath, "xm-XtXJob" + id, conf); FileSystem fs = FileSystem.get(outPath.toUri(), conf); if (!fs.exists(outPath)) { run(conf, xMatrix.getRowPath(), xMatrix.numCols(), null, outPath); } else {//from w ww . ja va2 s .co m log.warn("----------- Skip XtXjob - already exists: " + outPath); } DistributedRowMatrix xtx = new DistributedRowMatrix(outPath, tmpPath, xMatrix.numCols(), xMatrix.numCols()); xtx.setConf(conf); return xtx; }
From source file:com.twitter.algebra.nmf.XtXJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, int numCols, String xmPath, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.setInt(MATRIXCOLS, numCols);/*www . j a va 2 s.com*/ // conf.set(XMPATH, xmPath); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, new Path[] { matrixInputPath }, "xtx"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJobName("XtXJob-" + matrixOutputPath.getName()); job.setJarByClass(XtXJob.class); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "xtx"); job.setNumReduceTasks(numReducers); // ensures total order (when used with {@link MatrixOutputFormat}), RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numCols); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); job.waitForCompletion(true); }