Example usage for org.apache.hadoop.fs FileSystem makeQualified

List of usage examples for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path) 

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:org.qcri.pca.MeanAndSpanJob.java

/**
 * Computes the column-wise mean and span of a DistributedRowMatrix
 * @throws IOException //from  w  ww. j a  va 2s .co  m
 * 
 */
public Path compuateMeanAndSpan(Path inputPath, Path outputPath, DenseVector resMean, boolean normalizeMean,
        Configuration conf, String id) throws IOException {
    Path meanSpanDirPath = new Path(outputPath, "meanAndSpan" + id);
    FileSystem fs = FileSystem.get(inputPath.toUri(), conf);
    meanSpanDirPath = fs.makeQualified(meanSpanDirPath);
    if (!fs.exists(meanSpanDirPath)) {
        Path rowPath = fs.makeQualified(inputPath);
        run(conf, rowPath, meanSpanDirPath);
    } else {
        log.warn("--------- Skip MeanAndSpanJob - already exists" + meanSpanDirPath);
    }
    Path meanSpanPath = getMeanSpanPath(meanSpanDirPath);
    loadResults(meanSpanPath, normalizeMean, conf);
    resMean.assign(getMeanVector());
    return meanSpanPath;
}

From source file:org.qcri.pca.Norm2Job.java

public void run(Configuration conf, Path matrixInputPath, String meanSpanFileName, Path matrixOutputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MEANSPANOPTION, meanSpanFileName);
    Job job = new Job(conf);
    job.setJobName("Norm2Job");
    job.setJarByClass(Norm2Job.class);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);
    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setNumReduceTasks(1);//from   w w w  .  j  a  va2 s.com
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.submit();
    job.waitForCompletion(true);
}

From source file:org.qcri.pca.NormalizeJob.java

public void run(Configuration conf, Path matrixInputPath, String meanSpanFileName, Path matrixOutputPath,
        double sampleRate) throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MEANSPANOPTION, meanSpanFileName);
    conf.setFloat(SAMPLERATE, (float) sampleRate);
    Job job = new Job(conf);
    job.setJobName("Normalize");
    job.setJarByClass(NormalizeJob.class);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);
    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(NormalizeMapper.class);
    job.setNumReduceTasks(0);/*from   ww w  .jav  a  2  s  .  c  om*/
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    job.waitForCompletion(true);
}

From source file:org.qcri.pca.ReconstructionErrJob.java

/**
 * Refer to {@link ReconstructionErrJob} for explanation of the job
 * /*from w w w  . j ava 2s  . c o m*/
 * @param conf
 *          the configuration
 * @param yPath
 *          the path to input matrix Y
 * @param y2xPath
 *          the path to in-memory matrix Y2X, where X = Y * Y2X
 * @param yCols
 *          the number of columns in Y
 * @param xCols
 *          the number of columns in X
 * @param cPath
 *          the path to in-memory matrix C, where ReconY = Xc * C'
 * @param zmPath
 *          the path to vector Zm, where Zm = Ym * Y2X * C' - Ym
 * @param ymPath
 *          the path the the mean vector Ym
 * @param outPath
 *          the output path
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void run(Configuration conf, Path yPath, Path y2xPath, int yCols, int xCols, Path cPath, String zmPath,
        String ymPath, Path outPath, final float ERR_SAMPLE_RATE)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MATRIXY2X, y2xPath.toString());
    conf.set(RECONSTRUCTIONMATRIX, cPath.toString());
    conf.set(ZMPATH, zmPath);
    conf.set(YMPATH, ymPath);
    conf.setInt(YCOLS, yCols);
    conf.setInt(XCOLS, xCols);
    conf.set(ERRSAMPLERATE, "" + ERR_SAMPLE_RATE);
    FileSystem fs = FileSystem.get(yPath.toUri(), conf);
    yPath = fs.makeQualified(yPath);
    outPath = fs.makeQualified(outPath);
    Job job = new Job(conf);
    FileInputFormat.addInputPath(job, yPath);
    FileOutputFormat.setOutputPath(job, outPath);
    job.setJobName("ReconErrJob-" + yPath.getName());
    job.setJarByClass(ReconstructionErrJob.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(1);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setNumReduceTasks(1);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.submit();
    job.waitForCompletion(true);
}

From source file:org.qcri.pca.VarianceJob.java

public void run(Configuration conf, Path yPath, String ymPath, String matrixY2XDir, String xmPath,
        String matrixCDir, Path outPath) throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MATRIXY2X, matrixY2XDir);/*from   w  ww.  jav  a  2s  .co  m*/
    conf.set(MATRIXC, matrixCDir);
    conf.set(XMPATH, xmPath);
    conf.set(YMPATH, ymPath);
    FileSystem fs = FileSystem.get(yPath.toUri(), conf);
    yPath = fs.makeQualified(yPath);
    outPath = fs.makeQualified(outPath);
    Job job = new Job(conf);
    FileInputFormat.addInputPath(job, yPath);
    FileOutputFormat.setOutputPath(job, outPath);
    job.setJobName("VarianceJob-" + yPath.getName());
    job.setJarByClass(VarianceJob.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(1);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.submit();
    job.waitForCompletion(true);
}

From source file:org.seqdoop.hadoop_bam.TestVCFRoundTrip.java

License:Open Source License

private Path doMapReduce(final Path inputPath, final boolean writeHeader) throws Exception {
    final FileSystem fileSystem = FileSystem.get(conf);
    final Path outputPath = fileSystem.makeQualified(new Path("target/out"));
    fileSystem.delete(outputPath, true);

    final Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, inputPath);

    job.setInputFormatClass(VCFInputFormat.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(VariantContextWritable.class);

    job.setOutputFormatClass(//from www.  ja  v a  2s.c  o m
            writeHeader ? VCFTestWithHeaderOutputFormat.class : VCFTestNoHeaderOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(VariantContextWritable.class);

    job.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(job, outputPath);
    if (codecClass != null) {
        FileOutputFormat.setOutputCompressorClass(job, codecClass);
    }

    final boolean success = job.waitForCompletion(true);
    assertTrue(success);

    return outputPath;
}

From source file:org.swjtu.helloworldcn.APCParallelUpdateRAJob.java

License:Apache License

public static Path runJob(Path inputPath, Path outputPath, int colnums, double lamda)
        throws IOException, ClassNotFoundException, InterruptedException {

    // set up the parameters of runtime for jobs
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    inputPath = fs.makeQualified(inputPath);

    conf.set(COL_NUMS, colnums + "");
    conf.set(LAMDA, String.valueOf(lamda));
    /*      conf.setInt("io.sort.mb", 400);
          conf.setInt("io.sort.factor", 100);
            /*from   w  ww.ja  v a2s.com*/
          //conf.setInt("mapred.reduce.parallel.copies", 20);
          //conf.setFloat("mapred.job.reduce.input.buffer.percent", 0.5f);
          conf.setBoolean("mapred.output.compress",true);
          conf.setClass("mapred.output.compression.codec",GzipCodec.class, CompressionCodec.class);*/

    outputPath = fs.makeQualified(outputPath);

    // set up the APCParallelUpdateRAJob job 
    Job jobUpdate = new Job(conf, "APCParallelUpdateRAJob");

    jobUpdate.setInputFormatClass(SequenceFileInputFormat.class);
    jobUpdate.setOutputKeyClass(IntWritable.class);
    jobUpdate.setOutputValueClass(APCMatrixEntryWritable.class);
    jobUpdate.setOutputFormatClass(SequenceFileOutputFormat.class);
    jobUpdate.setMapperClass(APCParallelUpdateRAMapper.class);
    jobUpdate.setReducerClass(APCParallelUpdateRAReducer.class);
    Path outputUpdatePath = new Path(outputPath, "tmp");
    FileInputFormat.addInputPath(jobUpdate, inputPath);
    FileOutputFormat.setOutputPath(jobUpdate, outputUpdatePath);

    jobUpdate.setJarByClass(APCParallelUpdateRAJob.class);

    jobUpdate.waitForCompletion(true);

    // set up the translate matrix job 
    Job jobTrans = new Job(conf, "APCParallelUpdateRATransJob");

    jobTrans.setInputFormatClass(SequenceFileInputFormat.class);
    //jobTrans.setOutputKeyClass(IntWritable.class);
    //jobTrans.setOutputValueClass(APCRowVectorWritable.class);
    jobTrans.setOutputFormatClass(SequenceFileOutputFormat.class);
    jobTrans.setMapperClass(APCParallelUpdateRATransposeMapper.class);
    jobTrans.setReducerClass(APCParallelUpdateRATransposeReducer.class);
    jobTrans.setMapOutputKeyClass(IntWritable.class);
    jobTrans.setMapOutputValueClass(APCMatrixEntryWritable.class);
    jobTrans.setOutputKeyClass(IntWritable.class);
    jobTrans.setOutputValueClass(APCRowVectorWritable.class);

    FileInputFormat.addInputPath(jobTrans, outputUpdatePath);
    outputPath = new Path(outputPath, "result");
    FileOutputFormat.setOutputPath(jobTrans, outputPath);

    jobTrans.setJarByClass(APCParallelUpdateRAJob.class);

    jobTrans.waitForCompletion(true);

    return outputPath;

}

From source file:org.swjtu.helloworldcn.ComputeAvailabilitiesJob.java

License:Apache License

public static DistributedRowMatrix runJob(DistributedRowMatrix A, DistributedRowMatrix R,
        DistributedRowMatrix st, Path outputPath, Path tmpPath)
        throws IOException, ClassNotFoundException, InterruptedException {

    // set up the serialization of the diagonal vector
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path APath = fs.makeQualified(A.getRowPath());

    conf.set(ST_PATH, st.getRowPath().toUri().toString());
    conf.set(R_PATH, A.getRowPath().toUri().toString());
    conf.set(TMP_ST_PATH, st.getOutputTempPath().toUri().toString());
    conf.set(TMP_R_PATH, A.getOutputTempPath().toUri().toString());
    conf.set(COL_NUMS, A.numRows() + "");

    outputPath = fs.makeQualified(outputPath);

    /*/* w  w w .ja va  2s. c  o  m*/
     * VectorCache.save(new IntWritable(EigencutsKeys.DIAGONAL_CACHE_INDEX),
     * diag, vectorOutputPath, conf);
     */

    // set up the job itself
    Job job = new Job(conf, "ComputeAvailabilities");
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(ComputeAvailabilitiesMapper.class);
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, APath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setJarByClass(ComputeAvailabilitiesJob.class);

    job.waitForCompletion(true);

    // build the resulting DRM from the results
    return new DistributedRowMatrix(outputPath, tmpPath, A.numRows(), A.numCols());
}

From source file:org.swjtu.helloworldcn.ComputeResponsibilitiesJob.java

License:Apache License

public static DistributedRowMatrix runJob(DistributedRowMatrix A, DistributedRowMatrix R,
        DistributedRowMatrix st, Path outputPath, Path tmpPath)
        throws IOException, ClassNotFoundException, InterruptedException {

    // set up the serialization of the diagonal vector
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path RPath = fs.makeQualified(R.getRowPath());

    conf.set(ST_PATH, st.getRowPath().toUri().toString());
    conf.set(A_PATH, A.getRowPath().toUri().toString());
    conf.set(TMP_ST_PATH, st.getOutputTempPath().toUri().toString());
    conf.set(TMP_A_PATH, A.getOutputTempPath().toUri().toString());
    conf.set(COL_NUMS, A.numRows() + "");

    outputPath = fs.makeQualified(outputPath);

    /*// w  ww  .j  a  v a 2  s .co m
     * VectorCache.save(new IntWritable(EigencutsKeys.DIAGONAL_CACHE_INDEX),
     * diag, vectorOutputPath, conf);
     */

    // set up the job itself
    Job job = new Job(conf, "ComputeResponsibilities");
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(ComputeResponsibilitiesMapper.class);
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, RPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setJarByClass(ComputeResponsibilitiesJob.class);

    job.waitForCompletion(true);

    // build the resulting DRM from the results
    return new DistributedRowMatrix(outputPath, tmpPath, R.numRows(), R.numCols());
}

From source file:pl.edu.icm.coansys.heeut.TestHBaseMapReduce.java

License:Apache License

@Test(timeout = 1800000)
public void testExportImport() throws Exception {

    String tableInitName = getCurrentDateAppended("testExportImport");
    createAndPopulateDefaultTable(tableInitName, TEST_ROW_COUNT);

    FileSystem dfs = UTIL.getDFSCluster().getFileSystem();
    Path qualifiedTempDir = dfs.makeQualified(new Path("export-import-temp-dir"));
    Assert.assertFalse(dfs.exists(qualifiedTempDir));

    Job jobExport = Export.createSubmittableJob(UTIL.getConfiguration(),
            new String[] { tableInitName, qualifiedTempDir.toString() });
    jobExport.waitForCompletion(true);// www  . j a  v a  2  s .  c o m

    Assert.assertTrue(dfs.exists(qualifiedTempDir));

    final String tableImportName = tableInitName + "Import";
    HTable htableImport = UTIL.createTable(Bytes.toBytes(tableImportName), B_COLUMN_FAMILY);

    Job jobImport = Import.createSubmittableJob(UTIL.getConfiguration(),
            new String[] { tableImportName, qualifiedTempDir.toString() });
    jobImport.waitForCompletion(true);
    Assert.assertEquals(TEST_ROW_COUNT, (long) UTIL.countRows(htableImport));

    dropTable(tableInitName);
    dropTable(tableImportName);
}