Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:com.twitter.algebra.nmf.CombinerJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int aRows)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);
    //    conf.setBoolean("mapreduce.output.compress", true);
    //    conf.setBoolean("mapreduce.output.fileoutputformat.compress", true);
    //    conf.set("mapreduce.output.fileoutputformat.compress.codec", "com.hadoop.compression.lzo.LzoCodec");
    conf.setInt("dfs.replication", 20);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(CombinerJob.class);
    job.setJobName(CombinerJob.class.getSimpleName() + "-" + matrixOutputPath.getName());
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);

    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "combiner");
    job.setNumReduceTasks(numReducers);// TODO: make it a parameter

    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(IdMapper.class);
    job.setReducerClass(MergeVectorsReducer.class);

    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, aRows);

    job.submit();//from   w w w  .j  av  a 2 s.c om
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.CompositeDMJ.java

License:Apache License

/**
 * Refer to {@link CompositeDMJ} for further details.
 *///from   ww w.j  a  va2 s. co  m
public static DistributedRowMatrix run(Configuration conf, DistributedRowMatrix A, DistributedRowMatrix B,
        DistributedRowMatrix inMemC, String label, float alpha1, float alpha2)
        throws IOException, InterruptedException, ClassNotFoundException {
    log.info("running " + CompositeDMJ.class.getName());
    if (A.numRows() != B.numRows()) {
        throw new CardinalityException(A.numRows(), B.numRows());
    }
    if (A.numCols() != B.numCols()) {
        throw new CardinalityException(A.numCols(), B.numCols());
    }
    if (A.numCols() != inMemC.numRows()) {
        throw new CardinalityException(A.numCols(), inMemC.numRows());
    }
    if (inMemC.numCols() != inMemC.numRows()) {
        throw new CardinalityException(inMemC.numCols(), inMemC.numRows());
    }
    Path outPath = new Path(A.getOutputTempPath(), label);
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    CompositeDMJ job = new CompositeDMJ();
    if (!fs.exists(outPath)) {
        job.run(conf, A.getRowPath(), B.getRowPath(), outPath, A.numRows(), inMemC.getRowPath(),
                inMemC.numRows(), inMemC.numCols(), alpha1, alpha2);
    } else {
        log.warn("----------- Skip already exists: " + outPath);
    }
    DistributedRowMatrix distRes = new DistributedRowMatrix(outPath, A.getOutputTempPath(), A.numRows(),
            A.numCols());
    distRes.setConf(conf);
    return distRes;
}

From source file:com.twitter.algebra.nmf.CompositeDMJ.java

License:Apache License

public void run(Configuration conf, Path aPath, Path bPath, Path matrixOutputPath, int atCols, String inMemCStr,
        int inMemCRows, int inMemCCols, float alpha1, float alpha2)
        throws IOException, InterruptedException, ClassNotFoundException {
    FileSystem fs = FileSystem.get(aPath.toUri(), conf);
    long atSize = MapDir.du(aPath, fs);
    long bSize = MapDir.du(bPath, fs);
    log.info("Choosing the smaller matrix: atSize: " + atSize + " bSize: " + bSize);
    boolean aIsMapDir = atSize < bSize;
    CompositeDMJ job = new CompositeDMJ();
    Job hjob;/*from www . j av a 2 s  .  c o  m*/
    if (aIsMapDir)
        hjob = job.run(conf, aPath, bPath, matrixOutputPath, atCols, aIsMapDir, inMemCStr, inMemCRows,
                inMemCCols, alpha1, alpha2);
    else
        hjob = job.run(conf, bPath, aPath, matrixOutputPath, atCols, aIsMapDir, inMemCStr, inMemCRows,
                inMemCCols, alpha1, alpha2);
    boolean res = hjob.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed! ");
}

From source file:com.twitter.algebra.nmf.CompositeDMJ.java

License:Apache License

public Job run(Configuration conf, Path mapDirPath, Path matrixInputPaths, Path matrixOutputPath, int atCols,
        boolean aIsMapDir, String inMemCStr, int inMemCRows, int inMemCCols, float alpha1, float alpha2)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);
    conf.set(MATRIXINMEMORY, inMemCStr);
    conf.setInt(MATRIXINMEMORYROWS, inMemCRows);
    conf.setInt(MATRIXINMEMORYCOLS, inMemCCols);

    conf.setFloat(ALPHA1, alpha1);//from   w w  w.  j a  v  a2  s .co m
    conf.setFloat(ALPHA2, alpha2);

    FileSystem fs = FileSystem.get(matrixOutputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPaths, "compositedmj");

    conf.set(MAPDIRMATRIX, mapDirPath.toString());
    conf.setBoolean(AISMAPDIR, aIsMapDir);
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(CompositeDMJ.class);
    job.setJobName(CompositeDMJ.class.getSimpleName() + "-" + matrixOutputPath.getName());
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    matrixInputPaths = fs.makeQualified(matrixInputPaths);
    MultipleInputs.addInputPath(job, matrixInputPaths, SequenceFileInputFormat.class);

    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, atCols);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    return job;
}

From source file:com.twitter.algebra.nmf.DistRndMatrixJob.java

License:Apache License

public static DistributedRowMatrix random(Configuration conf, int rows, int cols, Path tmpPath, String label)
        throws IOException, InterruptedException, ClassNotFoundException {
    Path inputPath = new Path(tmpPath, "null-in");
    Path outputPath = new Path(tmpPath, "Random-" + label + "-" + rows + "-" + cols);
    FileSystem fs = FileSystem.get(outputPath.toUri(), conf);
    DistRndMatrixJob job = new DistRndMatrixJob();
    if (!fs.exists(inputPath)) {
        FSDataOutputStream inFile = fs.create(inputPath);
        inFile.write("NullValue".getBytes());
        inFile.close();//from  ww w.  j  a va2  s .  c  om
    }
    if (!fs.exists(outputPath)) {
        job.run(conf, inputPath, outputPath, rows, cols);
    } else {
        log.warn("----------- Skip already exists: " + outputPath);
    }
    DistributedRowMatrix distRes = new DistributedRowMatrix(outputPath, tmpPath, rows, cols);
    distRes.setConf(conf);
    return distRes;
}

From source file:com.twitter.algebra.nmf.DistRndMatrixJob.java

License:Apache License

public void run(Configuration conf, Path inPath, Path matrixOutputPath, int numInputRows, int numInputCols)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.setInt(ROWS, numInputRows);//from  w w  w.  ja va 2s  .co  m
    conf.setInt(COLS, numInputCols);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(DistRndMatrixJob.class);
    job.setJobName(DistRndMatrixJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    FileSystem fs = FileSystem.get(inPath.toUri(), conf);
    inPath = fs.makeQualified(inPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, inPath);
    job.setInputFormatClass(TextInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numInputRows);

    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "random");
    job.setNumReduceTasks(numReducers);

    job.setReducerClass(MyReducer.class);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.Edge2MapDirJob.java

License:Apache License

public static void format(Configuration conf, Path input, Path tmpPath, String label, String name)
        throws IOException, InterruptedException, ClassNotFoundException {
    int totalIndex = ReindexerJob.index(conf, input, tmpPath, ReindexerJob.getName(name));
    Path indexPath = new Path(tmpPath, ReindexerJob.getName(name));
    // TODO: here we assume that input matrix is square
    Path outputPath = new Path(tmpPath, label);
    FileSystem fs = FileSystem.get(outputPath.toUri(), conf);
    Edge2MapDirJob job = new Edge2MapDirJob();
    if (!fs.exists(outputPath)) {
        job.run(conf, input, outputPath, totalIndex, totalIndex, indexPath.toString());
    } else {//from  w w  w . j  a  v a2s.c o  m
        log.warn("----------- Skip already exists: " + outputPath);
    }
}

From source file:com.twitter.algebra.nmf.Edge2MapDirJob.java

License:Apache License

public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int numInputRows,
        int numInputCols, String name) throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.set(INDEXNAME, name);/*from   www .  ja v  a 2  s.  co  m*/
    conf.setInt(ROWS, numInputRows);
    conf.setInt(COLS, numInputCols);
    conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t");
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "edge2matrix");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(Edge2MapDirJob.class);
    job.setJobName(Edge2MapDirJob.class.getSimpleName() + "-" + matrixOutputPath.getName());

    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);

    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "edge2matrix");
    job.setNumReduceTasks(numReducers);
    // ensures total order (when used with {@link MatrixOutputFormat}),
    RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numInputRows);

    job.setCombinerClass(MergeVectorsCombiner.class);
    job.setReducerClass(MergeVectorsReducer.class);
    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed!");
}

From source file:com.twitter.algebra.nmf.ErrDMJ.java

License:Apache License

public static long run(Configuration conf, DistributedRowMatrix X, Vector xColSumVec, DistributedRowMatrix A,
        DistributedRowMatrix Yt, String label)
        throws IOException, InterruptedException, ClassNotFoundException {
    log.info("running " + ErrDMJ.class.getName());
    if (X.numRows() != A.numRows()) {
        throw new CardinalityException(A.numRows(), A.numRows());
    }/*from   ww  w . j  a va2  s. com*/
    if (A.numCols() != Yt.numCols()) {
        throw new CardinalityException(A.numCols(), Yt.numCols());
    }
    if (X.numCols() != Yt.numRows()) {
        throw new CardinalityException(X.numCols(), Yt.numRows());
    }
    Path outPath = new Path(A.getOutputTempPath(), label);
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    ErrDMJ job = new ErrDMJ();
    long totalErr = -1;
    if (!fs.exists(outPath)) {
        Job hJob = job.run(conf, X.getRowPath(), A.getRowPath(), Yt.getRowPath(), outPath, A.numRows(),
                Yt.numRows(), Yt.numCols());
        Counters counters = hJob.getCounters();
        counters.findCounter("Result", "sumAbs").getValue();
        log.info("FINAL ERR is " + totalErr);
    } else {
        log.warn("----------- Skip already exists: " + outPath);
    }
    Vector sumErrVec = AlgebraCommon.mapDirToSparseVector(outPath, 1, X.numCols(), conf);
    double maxColErr = Double.MIN_VALUE;
    double sumColErr = 0;
    int cntColErr = 0;
    Iterator<Vector.Element> it = sumErrVec.nonZeroes().iterator();
    while (it.hasNext()) {
        Vector.Element el = it.next();
        double errP2 = el.get();
        double origP2 = xColSumVec.get(el.index());
        double colErr = Math.sqrt(errP2 / origP2);
        log.info("col: " + el.index() + " sum(err^2): " + errP2 + " sum(val^2): " + origP2 + " colErr: "
                + colErr);
        maxColErr = Math.max(colErr, maxColErr);
        sumColErr += colErr;
        cntColErr++;
    }
    log.info(" Max Col Err: " + maxColErr);
    log.info(" Avg Col Err: " + sumColErr / cntColErr);
    return totalErr;
}

From source file:com.twitter.algebra.nmf.ErrDMJ.java

License:Apache License

public Job run(Configuration conf, Path xPath, Path matrixAInputPath, Path ytPath, Path outPath, int aRows,
        int ytRows, int ytCols) throws IOException, InterruptedException, ClassNotFoundException {
    conf = new Configuration(conf);

    conf.set(MAPDIRMATRIXX, xPath.toString());
    conf.set(MAPDIRMATRIXYT, ytPath.toString());
    conf.setInt(YTROWS, ytRows);//from   w w w  .ja  va 2  s .  co m
    conf.setInt(YTCOLS, ytCols);
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    NMFCommon.setNumberOfMapSlots(conf, fs, matrixAInputPath, "err");

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    job.setJarByClass(ErrDMJ.class);
    job.setJobName(ErrDMJ.class.getSimpleName() + "-" + outPath.getName());

    matrixAInputPath = fs.makeQualified(matrixAInputPath);
    MultipleInputs.addInputPath(job, matrixAInputPath, SequenceFileInputFormat.class);

    outPath = fs.makeQualified(outPath);
    FileOutputFormat.setOutputPath(job, outPath);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    int numReducers = 1;
    job.setNumReduceTasks(numReducers);
    job.setCombinerClass(SumVectorsReducer.class);
    job.setReducerClass(SumVectorsReducer.class);

    job.setOutputFormatClass(MatrixOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    boolean res = job.waitForCompletion(true);
    if (!res)
        throw new IOException("Job failed! ");
    return job;
}