List of usage examples for org.apache.hadoop.fs FileSystem makeQualified
public Path makeQualified(Path path)
From source file:org.qcri.pca.MeanAndSpanJob.java
/** * Computes the column-wise mean and span of a DistributedRowMatrix * @throws IOException //from w ww. j a va 2s .co m * */ public Path compuateMeanAndSpan(Path inputPath, Path outputPath, DenseVector resMean, boolean normalizeMean, Configuration conf, String id) throws IOException { Path meanSpanDirPath = new Path(outputPath, "meanAndSpan" + id); FileSystem fs = FileSystem.get(inputPath.toUri(), conf); meanSpanDirPath = fs.makeQualified(meanSpanDirPath); if (!fs.exists(meanSpanDirPath)) { Path rowPath = fs.makeQualified(inputPath); run(conf, rowPath, meanSpanDirPath); } else { log.warn("--------- Skip MeanAndSpanJob - already exists" + meanSpanDirPath); } Path meanSpanPath = getMeanSpanPath(meanSpanDirPath); loadResults(meanSpanPath, normalizeMean, conf); resMean.assign(getMeanVector()); return meanSpanPath; }
From source file:org.qcri.pca.Norm2Job.java
public void run(Configuration conf, Path matrixInputPath, String meanSpanFileName, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MEANSPANOPTION, meanSpanFileName); Job job = new Job(conf); job.setJobName("Norm2Job"); job.setJarByClass(Norm2Job.class); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setNumReduceTasks(1);//from w w w . j a va2 s.com job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(DoubleWritable.class); job.submit(); job.waitForCompletion(true); }
From source file:org.qcri.pca.NormalizeJob.java
public void run(Configuration conf, Path matrixInputPath, String meanSpanFileName, Path matrixOutputPath, double sampleRate) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MEANSPANOPTION, meanSpanFileName); conf.setFloat(SAMPLERATE, (float) sampleRate); Job job = new Job(conf); job.setJobName("Normalize"); job.setJarByClass(NormalizeJob.class); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(NormalizeMapper.class); job.setNumReduceTasks(0);/*from ww w .jav a 2 s . c om*/ job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); job.waitForCompletion(true); }
From source file:org.qcri.pca.ReconstructionErrJob.java
/** * Refer to {@link ReconstructionErrJob} for explanation of the job * /*from w w w . j ava 2s . c o m*/ * @param conf * the configuration * @param yPath * the path to input matrix Y * @param y2xPath * the path to in-memory matrix Y2X, where X = Y * Y2X * @param yCols * the number of columns in Y * @param xCols * the number of columns in X * @param cPath * the path to in-memory matrix C, where ReconY = Xc * C' * @param zmPath * the path to vector Zm, where Zm = Ym * Y2X * C' - Ym * @param ymPath * the path the the mean vector Ym * @param outPath * the output path * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void run(Configuration conf, Path yPath, Path y2xPath, int yCols, int xCols, Path cPath, String zmPath, String ymPath, Path outPath, final float ERR_SAMPLE_RATE) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MATRIXY2X, y2xPath.toString()); conf.set(RECONSTRUCTIONMATRIX, cPath.toString()); conf.set(ZMPATH, zmPath); conf.set(YMPATH, ymPath); conf.setInt(YCOLS, yCols); conf.setInt(XCOLS, xCols); conf.set(ERRSAMPLERATE, "" + ERR_SAMPLE_RATE); FileSystem fs = FileSystem.get(yPath.toUri(), conf); yPath = fs.makeQualified(yPath); outPath = fs.makeQualified(outPath); Job job = new Job(conf); FileInputFormat.addInputPath(job, yPath); FileOutputFormat.setOutputPath(job, outPath); job.setJobName("ReconErrJob-" + yPath.getName()); job.setJarByClass(ReconstructionErrJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(1); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setNumReduceTasks(1); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(DoubleWritable.class); job.submit(); job.waitForCompletion(true); }
From source file:org.qcri.pca.VarianceJob.java
public void run(Configuration conf, Path yPath, String ymPath, String matrixY2XDir, String xmPath, String matrixCDir, Path outPath) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MATRIXY2X, matrixY2XDir);/*from w ww. jav a 2s .co m*/ conf.set(MATRIXC, matrixCDir); conf.set(XMPATH, xmPath); conf.set(YMPATH, ymPath); FileSystem fs = FileSystem.get(yPath.toUri(), conf); yPath = fs.makeQualified(yPath); outPath = fs.makeQualified(outPath); Job job = new Job(conf); FileInputFormat.addInputPath(job, yPath); FileOutputFormat.setOutputPath(job, outPath); job.setJobName("VarianceJob-" + yPath.getName()); job.setJarByClass(VarianceJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(1); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(DoubleWritable.class); job.submit(); job.waitForCompletion(true); }
From source file:org.seqdoop.hadoop_bam.TestVCFRoundTrip.java
License:Open Source License
private Path doMapReduce(final Path inputPath, final boolean writeHeader) throws Exception { final FileSystem fileSystem = FileSystem.get(conf); final Path outputPath = fileSystem.makeQualified(new Path("target/out")); fileSystem.delete(outputPath, true); final Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(VCFInputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(VariantContextWritable.class); job.setOutputFormatClass(//from www. ja v a 2s.c o m writeHeader ? VCFTestWithHeaderOutputFormat.class : VCFTestNoHeaderOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(VariantContextWritable.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputPath); if (codecClass != null) { FileOutputFormat.setOutputCompressorClass(job, codecClass); } final boolean success = job.waitForCompletion(true); assertTrue(success); return outputPath; }
From source file:org.swjtu.helloworldcn.APCParallelUpdateRAJob.java
License:Apache License
public static Path runJob(Path inputPath, Path outputPath, int colnums, double lamda) throws IOException, ClassNotFoundException, InterruptedException { // set up the parameters of runtime for jobs Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); inputPath = fs.makeQualified(inputPath); conf.set(COL_NUMS, colnums + ""); conf.set(LAMDA, String.valueOf(lamda)); /* conf.setInt("io.sort.mb", 400); conf.setInt("io.sort.factor", 100); /*from w ww.ja v a2s.com*/ //conf.setInt("mapred.reduce.parallel.copies", 20); //conf.setFloat("mapred.job.reduce.input.buffer.percent", 0.5f); conf.setBoolean("mapred.output.compress",true); conf.setClass("mapred.output.compression.codec",GzipCodec.class, CompressionCodec.class);*/ outputPath = fs.makeQualified(outputPath); // set up the APCParallelUpdateRAJob job Job jobUpdate = new Job(conf, "APCParallelUpdateRAJob"); jobUpdate.setInputFormatClass(SequenceFileInputFormat.class); jobUpdate.setOutputKeyClass(IntWritable.class); jobUpdate.setOutputValueClass(APCMatrixEntryWritable.class); jobUpdate.setOutputFormatClass(SequenceFileOutputFormat.class); jobUpdate.setMapperClass(APCParallelUpdateRAMapper.class); jobUpdate.setReducerClass(APCParallelUpdateRAReducer.class); Path outputUpdatePath = new Path(outputPath, "tmp"); FileInputFormat.addInputPath(jobUpdate, inputPath); FileOutputFormat.setOutputPath(jobUpdate, outputUpdatePath); jobUpdate.setJarByClass(APCParallelUpdateRAJob.class); jobUpdate.waitForCompletion(true); // set up the translate matrix job Job jobTrans = new Job(conf, "APCParallelUpdateRATransJob"); jobTrans.setInputFormatClass(SequenceFileInputFormat.class); //jobTrans.setOutputKeyClass(IntWritable.class); //jobTrans.setOutputValueClass(APCRowVectorWritable.class); jobTrans.setOutputFormatClass(SequenceFileOutputFormat.class); jobTrans.setMapperClass(APCParallelUpdateRATransposeMapper.class); jobTrans.setReducerClass(APCParallelUpdateRATransposeReducer.class); jobTrans.setMapOutputKeyClass(IntWritable.class); jobTrans.setMapOutputValueClass(APCMatrixEntryWritable.class); jobTrans.setOutputKeyClass(IntWritable.class); jobTrans.setOutputValueClass(APCRowVectorWritable.class); FileInputFormat.addInputPath(jobTrans, outputUpdatePath); outputPath = new Path(outputPath, "result"); FileOutputFormat.setOutputPath(jobTrans, outputPath); jobTrans.setJarByClass(APCParallelUpdateRAJob.class); jobTrans.waitForCompletion(true); return outputPath; }
From source file:org.swjtu.helloworldcn.ComputeAvailabilitiesJob.java
License:Apache License
public static DistributedRowMatrix runJob(DistributedRowMatrix A, DistributedRowMatrix R, DistributedRowMatrix st, Path outputPath, Path tmpPath) throws IOException, ClassNotFoundException, InterruptedException { // set up the serialization of the diagonal vector Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path APath = fs.makeQualified(A.getRowPath()); conf.set(ST_PATH, st.getRowPath().toUri().toString()); conf.set(R_PATH, A.getRowPath().toUri().toString()); conf.set(TMP_ST_PATH, st.getOutputTempPath().toUri().toString()); conf.set(TMP_R_PATH, A.getOutputTempPath().toUri().toString()); conf.set(COL_NUMS, A.numRows() + ""); outputPath = fs.makeQualified(outputPath); /*/* w w w .ja va 2s. c o m*/ * VectorCache.save(new IntWritable(EigencutsKeys.DIAGONAL_CACHE_INDEX), * diag, vectorOutputPath, conf); */ // set up the job itself Job job = new Job(conf, "ComputeAvailabilities"); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(ComputeAvailabilitiesMapper.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, APath); FileOutputFormat.setOutputPath(job, outputPath); job.setJarByClass(ComputeAvailabilitiesJob.class); job.waitForCompletion(true); // build the resulting DRM from the results return new DistributedRowMatrix(outputPath, tmpPath, A.numRows(), A.numCols()); }
From source file:org.swjtu.helloworldcn.ComputeResponsibilitiesJob.java
License:Apache License
public static DistributedRowMatrix runJob(DistributedRowMatrix A, DistributedRowMatrix R, DistributedRowMatrix st, Path outputPath, Path tmpPath) throws IOException, ClassNotFoundException, InterruptedException { // set up the serialization of the diagonal vector Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path RPath = fs.makeQualified(R.getRowPath()); conf.set(ST_PATH, st.getRowPath().toUri().toString()); conf.set(A_PATH, A.getRowPath().toUri().toString()); conf.set(TMP_ST_PATH, st.getOutputTempPath().toUri().toString()); conf.set(TMP_A_PATH, A.getOutputTempPath().toUri().toString()); conf.set(COL_NUMS, A.numRows() + ""); outputPath = fs.makeQualified(outputPath); /*// w ww .j a v a 2 s .co m * VectorCache.save(new IntWritable(EigencutsKeys.DIAGONAL_CACHE_INDEX), * diag, vectorOutputPath, conf); */ // set up the job itself Job job = new Job(conf, "ComputeResponsibilities"); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(ComputeResponsibilitiesMapper.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, RPath); FileOutputFormat.setOutputPath(job, outputPath); job.setJarByClass(ComputeResponsibilitiesJob.class); job.waitForCompletion(true); // build the resulting DRM from the results return new DistributedRowMatrix(outputPath, tmpPath, R.numRows(), R.numCols()); }
From source file:pl.edu.icm.coansys.heeut.TestHBaseMapReduce.java
License:Apache License
@Test(timeout = 1800000) public void testExportImport() throws Exception { String tableInitName = getCurrentDateAppended("testExportImport"); createAndPopulateDefaultTable(tableInitName, TEST_ROW_COUNT); FileSystem dfs = UTIL.getDFSCluster().getFileSystem(); Path qualifiedTempDir = dfs.makeQualified(new Path("export-import-temp-dir")); Assert.assertFalse(dfs.exists(qualifiedTempDir)); Job jobExport = Export.createSubmittableJob(UTIL.getConfiguration(), new String[] { tableInitName, qualifiedTempDir.toString() }); jobExport.waitForCompletion(true);// www . j a v a 2 s . c o m Assert.assertTrue(dfs.exists(qualifiedTempDir)); final String tableImportName = tableInitName + "Import"; HTable htableImport = UTIL.createTable(Bytes.toBytes(tableImportName), B_COLUMN_FAMILY); Job jobImport = Import.createSubmittableJob(UTIL.getConfiguration(), new String[] { tableImportName, qualifiedTempDir.toString() }); jobImport.waitForCompletion(true); Assert.assertEquals(TEST_ROW_COUNT, (long) UTIL.countRows(htableImport)); dropTable(tableInitName); dropTable(tableImportName); }