Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:com.twitter.algebra.nmf.ReindexerJob.java

License:Apache License

public static int index(Configuration conf, Path input, Path tmpPath, String label)
        throws IOException, InterruptedException, ClassNotFoundException {
    Path outputPath = new Path(tmpPath, label);
    FileSystem fs = FileSystem.get(outputPath.toUri(), conf);
    ReindexerJob job = new ReindexerJob();
    if (!fs.exists(outputPath)) {
        Job mrJob = job.run(conf, input, outputPath);
        long totalIndex = mrJob.getCounters().getGroup(TOTALINDEX_COUNTER_GROUP)
                .findCounter(TOTALINDEX_COUNTER_NAME).getValue();
        return (int) totalIndex;
    } else {/*  ww  w .j av  a  2 s. c  o m*/
        log.warn("----------- Skip already exists: " + outputPath);
        return -1;
    }
}

From source file:com.twitter.algebra.nmf.ReindexerJob.java

License:Apache License

public Job run(Configuration conf, Path matrixInputPath, Path matrixOutputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(ReindexerJob.class);
    job.setJobName(ReindexerJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setReducerClass(MyReducer.class);
    // this makes the reindexing very slow but is necessary to have total order
    job.setNumReduceTasks(1);//from   ww w  .j  av  a  2s.com

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
    return job;
}

From source file:com.twitter.algebra.nmf.RowSquareSumJob.java

License:Apache License

/**
 * Returns the path to the vector that contains the sum of the rows of A
 * @param conf//from  ww  w  .  j av  a2  s .c o m
 * @param A
 * @param label
 * @return
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public static Path run(Configuration conf, DistributedRowMatrix A, String label)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    log.info("running " + RowSquareSumJob.class.getName());
    Path outPath = new Path(A.getOutputTempPath(), label);
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    RowSquareSumJob job = new RowSquareSumJob();
    if (!fs.exists(outPath)) {
        job.run(conf, A.getRowPath(), outPath, A.numRows());
    } else {
        log.warn("----------- Skip already exists: " + outPath);
    }
    //    Matrix centRes = AlgebraCommon.mapDirToSparseMatrix(outPath, A.numRows(),
    //        A.numCols(), conf);
    //    Vector resVec = centRes.viewRow(0);
    //    System.out.println("Sum of the rows of " + A.getRowPath());
    //    System.out.println(resVec);
    return outPath;
}

From source file:com.twitter.algebra.nmf.RowSquareSumJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int aRows)
        throws IOException, InterruptedException, ClassNotFoundException {
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(RowSquareSumJob.class);
    job.setJobName(RowSquareSumJob.class.getSimpleName() + "-" + matrixOutputPath.getName());
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);

    int numReducers = 1;
    job.setNumReduceTasks(numReducers);/*from  w w w.j a  v  a  2 s . c o  m*/

    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(SumMapper.class);
    job.setCombinerClass(MergeVectorsReducer.class);
    job.setReducerClass(MergeVectorsReducer.class);

    //    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class,
    //        aRows);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.SampleColsJob.java

License:Apache License

public static DistributedRowMatrix run(Configuration conf, DistributedRowMatrix A, float sampleRate,
        String label) throws IOException, InterruptedException, ClassNotFoundException {
    log.info("running " + SampleColsJob.class.getName());
    Path outPath = new Path(A.getOutputTempPath(), label);
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    SampleColsJob job = new SampleColsJob();
    if (!fs.exists(outPath)) {
        job.run(conf, A.getRowPath(), A.numCols(), outPath, sampleRate);
    } else {/*from www  . j a  va2  s.  c  o m*/
        log.warn("----------- Skip already exists: " + outPath);
    }
    DistributedRowMatrix distRes = new DistributedRowMatrix(outPath, A.getOutputTempPath(), A.numRows(),
            A.numCols());
    distRes.setConf(conf);
    return distRes;
}

From source file:com.twitter.algebra.nmf.SampleColsJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, int cols, Path matrixOutputPath, float sampleRate)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.setFloat(SAMPLERATE, sampleRate);
    conf.setInt(COLS, cols);/*from ww w.ja  va2 s. c  o  m*/
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplecol");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(SampleColsJob.class);
    job.setJobName(SampleColsJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.SampleRowsJob.java

License:Apache License

public static DistributedRowMatrix run(Configuration conf, DistributedRowMatrix A, float sampleRate,
        String label) throws IOException, InterruptedException, ClassNotFoundException {
    log.info("running " + SampleRowsJob.class.getName());
    Path outPath = new Path(A.getOutputTempPath(), label);
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    SampleRowsJob job = new SampleRowsJob();
    if (!fs.exists(outPath)) {
        job.run(conf, A.getRowPath(), outPath, sampleRate);
    } else {// w w w.j  a  v a 2 s  . c  o  m
        log.warn("----------- Skip already exists: " + outPath);
    }
    DistributedRowMatrix distRes = new DistributedRowMatrix(outPath, A.getOutputTempPath(), A.numRows(),
            A.numCols());
    distRes.setConf(conf);
    return distRes;
}

From source file:com.twitter.algebra.nmf.SampleRowsJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, float sampleRate)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.setFloat(SAMPLERATE, sampleRate);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplerows");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(SampleRowsJob.class);
    job.setJobName(SampleRowsJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);

    job.setNumReduceTasks(0);//from   ww w  .j ava2 s . c  om
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.XtXJob.java

License:Apache License

public DistributedRowMatrix computeXtX(DistributedRowMatrix xMatrix, Vector xm, Path tmpPath,
        Configuration conf, String id) throws IOException, InterruptedException, ClassNotFoundException {
    Path outPath = new Path(tmpPath, "XtX-" + id);
    //    Path xmPath =
    //        AlgebraCommon.toDistributedVector(xm, tmpPath, "xm-XtXJob" + id, conf);
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    if (!fs.exists(outPath)) {
        run(conf, xMatrix.getRowPath(), xMatrix.numCols(), null, outPath);
    } else {//from w  ww  .  ja va2  s .co  m
        log.warn("----------- Skip XtXjob - already exists: " + outPath);
    }

    DistributedRowMatrix xtx = new DistributedRowMatrix(outPath, tmpPath, xMatrix.numCols(), xMatrix.numCols());
    xtx.setConf(conf);
    return xtx;
}

From source file:com.twitter.algebra.nmf.XtXJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, int numCols, String xmPath, Path matrixOutputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.setInt(MATRIXCOLS, numCols);/*www .  j  a va  2 s.com*/
    //    conf.set(XMPATH, xmPath);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, new Path[] { matrixInputPath }, "xtx");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJobName("XtXJob-" + matrixOutputPath.getName());
    job.setJarByClass(XtXJob.class);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);
    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "xtx");
    job.setNumReduceTasks(numReducers);
    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numCols);

    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.submit();
    job.waitForCompletion(true);
}