Example usage for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.util.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU// www.  j  ava  2s. com
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath)
        throws IOException, ClassNotFoundException, InterruptedException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")");
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = MatrixMultiplicationHybridBSP.createMatrixMultiplicationHybridBSPConf(initialConf,
            this.rowPath, other.rowPath, outPath.getParent());

    // Multiply Matrix
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU/* w  w  w  .  j a  v  a2  s .  c o  m*/
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, int tileWidth,
        boolean isDebugging) throws IOException, ClassNotFoundException, InterruptedException {

    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")");
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = MatrixMultiplicationHybridBSP.createMatrixMultiplicationHybridBSPConf(initialConf,
            this.rowPath, other.rowPath, outPath.getParent(), tileWidth, isDebugging);

    // Multiply Matrix
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCF.java

License:Apache License

@Override
public boolean load(String path, boolean lazy) {
    this.m_isLazyLoadModel = lazy;
    this.m_modelPath = path;

    if (lazy == false) {
        Path dataPath = new Path(m_modelPath);
        Configuration conf = new Configuration();
        try {//w ww  .j a v a 2s . c o m
            FileSystem fs = dataPath.getFileSystem(conf);
            LinkedList<Path> files = new LinkedList<Path>();

            if (!fs.exists(dataPath)) {
                this.m_isLazyLoadModel = false;
                this.m_modelPath = null;
                return false;
            }

            if (!fs.isFile(dataPath)) {
                for (int i = 0; i < 100000; i++) {
                    Path partFile = new Path(
                            m_modelPath + "/part-" + String.valueOf(100000 + i).substring(1, 6));
                    if (fs.exists(partFile)) {
                        files.add(partFile);
                    } else {
                        break;
                    }
                }
            } else {
                files.add(dataPath);
            }

            LOG.info("loading model from " + path);
            for (Path file : files) {
                SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);

                Text key = new Text();
                PipesVectorWritable value = new PipesVectorWritable();
                String strKey = null;
                Long actualKey = null;
                String firstSymbol = null;

                while (reader.next(key, value) != false) {
                    strKey = key.toString();
                    firstSymbol = strKey.substring(0, 1);
                    try {
                        actualKey = Long.valueOf(strKey.substring(1));
                    } catch (Exception e) {
                        actualKey = new Long(0);
                    }

                    if (firstSymbol.equals(OnlineCF.DFLT_MODEL_ITEM_DELIM)) {
                        // LOG.info("loaded itemId: " + actualKey + " itemVector: "
                        // + value.getVector());
                        m_modelItemFactorizedValues.put(actualKey, new PipesVectorWritable(value));
                    } else if (firstSymbol.equals(OnlineCF.DFLT_MODEL_USER_DELIM)) {
                        // LOG.info("loaded userId: " + actualKey + " userVector: "
                        // + value.getVector());
                        m_modelUserFactorizedValues.put(actualKey, new PipesVectorWritable(value));
                    } else {
                        // unknown
                        continue;
                    }
                }
                reader.close();
            }

            LOG.info("loaded: " + m_modelUserFactorizedValues.size() + " users, "
                    + m_modelItemFactorizedValues.size() + " items");
            // for (Long user : m_modelUserFactorizedValues.keySet()) {
            // LOG.info("userId: " + user + " userVector: "
            // + m_modelUserFactorizedValues.get(user));
            // }
            // for (Long item : m_modelItemFactorizedValues.keySet()) {
            // LOG.info("itemId: " + item + " itemVector: "
            // + m_modelItemFactorizedValues.get(item));
            // }

        } catch (Exception e) {
            e.printStackTrace();
            this.m_isLazyLoadModel = false;
            this.m_modelPath = null;
            return false;
        }
    }
    return true;
}

From source file:at.illecker.hama.hybrid.examples.testrootbeer.TestRootbeerHybridBSP.java

License:Apache License

private static void prepareInput(Configuration conf, Path inputPath, int n, int maxVal) throws IOException {
    FileSystem fs = inputPath.getFileSystem(conf);

    // Create input file writers depending on bspTaskNum
    int bspTaskNum = conf.getInt("bsp.peers.num", 1);
    SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum];
    for (int i = 0; i < bspTaskNum; i++) {
        Path inputFile = new Path(inputPath, "input" + i + ".seq");
        LOG.info("inputFile: " + inputFile.toString());
        inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, IntWritable.class,
                CompressionType.NONE);//from w w w .  j a  v  a2s  . c o  m
    }

    // Write random values to input files
    IntWritable key = new IntWritable();
    IntWritable value = new IntWritable();
    Random r = new Random();
    for (int i = 0; i < n; i++) {
        key.set(i);
        value.set(r.nextInt(maxVal));
        for (int j = 0; j < inputWriters.length; j++) {
            inputWriters[j].append(key, value);
        }
    }

    // Close file writers
    for (int j = 0; j < inputWriters.length; j++) {
        inputWriters[j].close();
    }
}

From source file:at.illecker.hama.hybrid.examples.testrootbeer.TestRootbeerHybridBSP.java

License:Apache License

static void printOutput(BSPJob job, Path path) throws IOException {
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    FileStatus[] files = fs.listStatus(path);
    for (int i = 0; i < files.length; i++) {
        if (files[i].getLen() > 0) {
            System.out.println("File " + files[i].getPath());
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, files[i].getPath(), job.getConfiguration());

                NullWritable key = NullWritable.get();
                IntWritable value = new IntWritable();
                while (reader.next(key, value)) {
                    System.out.println("key: '" + key + "' value: '" + value.get() + "'\n");
                }/*from  w  w w.  ja va  2s .  com*/
            } catch (IOException e) {
                FSDataInputStream in = fs.open(files[i].getPath());
                IOUtils.copyBytes(in, System.out, job.getConfiguration(), false);
                in.close();
            } finally {
                if (reader != null) {
                    reader.close();
                }
            }
        }
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.util.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU/*ww w  .  java 2 s  .  c o m*/
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU,
        boolean transposeMatrixA) throws IOException, ClassNotFoundException, InterruptedException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new CardinalityException(numCols, other.numRows());
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Transpose Matrix within a new MapReduce Job
    DistributedRowMatrix transposed = this;
    if (transposeMatrixA) {
        transposed = transposed.transpose();
    }
    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = null;
    if (!useGPU) {
        job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, transposed.rowPath,
                other.rowPath, outPath.getParent(), other.numCols);
    } else { // use GPU
        job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, transposed.rowPath,
                other.rowPath, outPath.getParent(), other.numCols);
    }

    // Multiply Matrix with transposed one
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.util.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU//  www . j a  va  2 s.  c o m
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU)
        throws IOException, ClassNotFoundException, InterruptedException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")");
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = null;
    if (!useGPU) {
        job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, this.rowPath,
                other.rowPath, outPath.getParent());

    } else { // use GPU

        job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, this.rowPath,
                other.rowPath, outPath.getParent());
    }

    // Multiply Matrix
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:authordetect.input.SingleBookReader.java

/**
 * @param inputSplit/*from  w w  w. ja v a2  s.co  m*/
 * @param context    the information about the task
 * @throws java.io.IOException
 * @throws InterruptedException
 */
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) inputSplit;
    Configuration configuration = context.getConfiguration();

    // get the option from configuration:
    // 0 for group by author, 1 for group by book
    int option = configuration.getInt("GROUP_OPTION", 0);

    Path path = split.getPath();
    filename = path.getName();
    FileSystem fileSystem = path.getFileSystem(configuration);
    FSDataInputStream inputStream = fileSystem.open(path);
    lineReader = new LineReader(inputStream, configuration);

    //initial start point and end point
    start = split.getStart();
    end = start + split.getLength();

    inputStream.seek(start);
    if (start != 0) {
        start += lineReader.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
    }

    start += lineReader.readLine(currentLine);

    prepareToScanBook(option);
}

From source file:azkaban.jobtype.connectors.teradata.TeradataToHdfsJobRunnerMain.java

License:Apache License

private void runCopyTdToHdfs() throws IOException {
    if (Boolean.valueOf(_jobProps.getProperty("force.output.overwrite", "false").trim())) {
        Path path = new Path(_jobProps.getProperty(TdchConstants.TARGET_HDFS_PATH_KEY));
        _logger.info("Deleting output directory " + path.toUri());
        JobConf conf = new JobConf();
        path.getFileSystem(conf).delete(path, true);
    }/*from w ww  .  j  av a  2  s .c om*/
    _logger.info(String.format("Executing %s with params: %s",
            TeradataToHdfsJobRunnerMain.class.getSimpleName(), _params));
    TeradataImportTool.main(_params.toTdchParams());
}

From source file:azkaban.jobtype.connectors.TeradataToHdfsJobRunnerMain.java

License:Apache License

private void runCopyTdToHdfs() throws IOException {
    if (Boolean.valueOf(_jobProps.getProperty("force.output.overwrite", "false").trim())) {
        Path path = new Path(_jobProps.getProperty(TdchConstants.TARGET_HDFS_PATH_KEY));
        _logger.info("Deleting output directory " + path.toUri());
        JobConf conf = new JobConf();
        path.getFileSystem(conf).delete(path, true);
    }/*from  w ww.  j  av a 2  s  .  c om*/

    _logger.info(String.format("Executing %s with params: %s",
            TeradataToHdfsJobRunnerMain.class.getSimpleName(), _params));
    TeradataImportTool.main(_params.toTdchParams());
}