Example usage for org.apache.hadoop.fs FileSystem rename

List of usage examples for org.apache.hadoop.fs FileSystem rename

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem rename.

Prototype

public abstract boolean rename(Path src, Path dst) throws IOException;

Source Link

Document

Renames Path src to Path dst.

Usage

From source file:ApplicationMaster.java

License:Apache License

private void renameScriptFile(final Path renamedScriptPath) throws IOException, InterruptedException {
    appSubmitterUgi.doAs(new PrivilegedExceptionAction<Void>() {
        @Override/*from w ww .j  a  va 2s  .  c  o  m*/
        public Void run() throws IOException {
            FileSystem fs = renamedScriptPath.getFileSystem(conf);
            fs.rename(new Path(scriptPath), renamedScriptPath);
            return null;
        }
    });
    LOG.info("User " + appSubmitterUgi.getUserName() + " added suffix(.sh/.bat) to script file as "
            + renamedScriptPath);
}

From source file:acromusashi.stream.bolt.hdfs.HdfsPreProcessor.java

License:Open Source License

/**
 * ????<br>/* ww  w . j a v  a2s  .c om*/
 * ???????????
 * 
 * @param hdfs 
 * @param targetTmpPath ??
 * @param tmpSuffix ??
 */
private static void renameTmpFile(FileSystem hdfs, String targetTmpPath, String tmpSuffix) {
    String basePath = extractBasePath(targetTmpPath, tmpSuffix);

    boolean isFileExists = true;

    try {
        isFileExists = hdfs.exists(new Path(basePath));
    } catch (IOException ioex) {
        String logFormat = "Failed to search target file exists. Skip file rename. : TargetUri={0}";
        String logMessage = MessageFormat.format(logFormat, basePath);
        logger.warn(logMessage, ioex);
        return;
    }

    if (isFileExists) {
        String logFormat = "File exists renamed target. Skip file rename. : BeforeUri={0} , AfterUri={1}";
        String logMessage = MessageFormat.format(logFormat, targetTmpPath, basePath);
        logger.warn(logMessage);
    } else {
        try {
            hdfs.rename(new Path(targetTmpPath), new Path(basePath));
        } catch (IOException ioex) {
            String logFormat = "Failed to HDFS file rename. Skip rename file and continue preprocess. : BeforeUri={0} , AfterUri={1}";
            String logMessage = MessageFormat.format(logFormat, targetTmpPath, basePath);
            logger.warn(logMessage, ioex);
        }
    }
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.util.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU/*from  ww w.j  a  va2  s .co  m*/
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath)
        throws IOException, ClassNotFoundException, InterruptedException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")");
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = MatrixMultiplicationHybridBSP.createMatrixMultiplicationHybridBSPConf(initialConf,
            this.rowPath, other.rowPath, outPath.getParent());

    // Multiply Matrix
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU//from   ww w .  j  av  a2s . c  o  m
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, int tileWidth,
        boolean isDebugging) throws IOException, ClassNotFoundException, InterruptedException {

    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")");
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = MatrixMultiplicationHybridBSP.createMatrixMultiplicationHybridBSPConf(initialConf,
            this.rowPath, other.rowPath, outPath.getParent(), tileWidth, isDebugging);

    // Multiply Matrix
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.MatrixMultiplicationHybridBSP.java

License:Apache License

public static void main(String[] args) throws Exception {

    // Defaults/*w  ww. j a  v a2  s.c o  m*/
    int numBspTask = 1;
    int numGpuBspTask = 1;
    int numRowsA = 4;// 1024;
    int numColsA = 4;// 1024;
    int numRowsB = 4;// 1024;
    int numColsB = 4;// 1024;
    int tileWidth = 32; // 2 * 32 = 1024 threads matches the blocksize
    int GPUPercentage = 100;
    boolean isDebugging = true;

    Configuration conf = new HamaConfiguration();

    if (args.length > 0) {
        if (args.length == 9) {
            numBspTask = Integer.parseInt(args[0]);
            numGpuBspTask = Integer.parseInt(args[1]);
            numRowsA = Integer.parseInt(args[2]);
            numColsA = Integer.parseInt(args[3]);
            numRowsB = Integer.parseInt(args[4]);
            numColsB = Integer.parseInt(args[5]);
            tileWidth = Integer.parseInt(args[6]);
            GPUPercentage = Integer.parseInt(args[7]);
            isDebugging = Boolean.parseBoolean(args[8]);

        } else {
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
            System.out.println("    Argument2=numGpuBspTask");
            System.out.println("    Argument3=numRowsA | Number of rows of the first input matrix");
            System.out.println("    Argument4=numColsA | Number of columns of the first input matrix");
            System.out.println("    Argument5=numRowsB | Number of rows of the second input matrix");
            System.out.println("    Argument6=numColsB | Number of columns of the second input matrix");
            System.out.println("    Argument7=tileWidth | TileWidth denotes the size of a submatrix");
            System.out.println("    Argument8=GPUPercentage (percentage of input)");
            System.out.println("    Argument9=debug | Enable debugging (true|false)");
            return;
        }
    }

    // Set config variables
    conf.setBoolean("hama.pipes.logging", false);
    // Set CPU tasks
    conf.setInt("bsp.peers.num", numBspTask);
    // Set GPU tasks
    conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
    // Set GPU workload
    // conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage);

    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
    LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
    // LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE));
    LOG.info("numRowsA: " + numRowsA);
    LOG.info("numColsA: " + numColsA);
    LOG.info("numRowsB: " + numRowsB);
    LOG.info("numColsB: " + numColsB);
    LOG.info("isDebugging: " + isDebugging);
    LOG.info("inputPath: " + CONF_INPUT_DIR);
    LOG.info("outputPath: " + CONF_OUTPUT_DIR);

    if (numColsA != numRowsB) {
        throw new Exception("Cols of MatrixA != rows of MatrixB! (" + numColsA + "!=" + numRowsB + ")");
    }

    // Create random DistributedRowMatrix
    // use constant seeds to get reproducible results
    // Matrix A
    DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L),
            MATRIX_A_SPLITS_PATH, false, numBspTask, numGpuBspTask, GPUPercentage);

    // Matrix B is stored in transposed order
    List<Path> transposedMatrixBPaths = DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB,
            numColsB, new Random(1337L), MATRIX_B_TRANSPOSED_PATH, true);

    // Execute MatrixMultiplication BSP Job
    long startTime = System.currentTimeMillis();

    BSPJob job = MatrixMultiplicationHybridBSP.createMatrixMultiplicationHybridBSPConf(conf,
            MATRIX_A_SPLITS_PATH, transposedMatrixBPaths.get(0), MATRIX_C_PATH, tileWidth, isDebugging);

    // Multiply Matrix
    DistributedRowMatrix matrixC = null;
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Path matrixCOutPath = new Path(MATRIX_C_PATH + "/part0.seq");

        FileSystem fs = MATRIX_C_PATH.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(MATRIX_C_PATH);
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), matrixCOutPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        matrixC = new DistributedRowMatrix(matrixCOutPath, MATRIX_C_PATH, numRowsA, numColsB);
        matrixC.setConf(conf);
    }

    LOG.info("MatrixMultiplicationHybrid using Hama finished in "
            + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    // Create matrix A in one file for verification
    List<Path> matrixAPaths = DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA,
            new Random(42L), MATRIX_A_PATH, false);
    DistributedRowMatrix matrixA = new DistributedRowMatrix(matrixAPaths.get(0), CONF_INPUT_DIR, numRowsA,
            numColsA);
    matrixA.setConf(conf);

    // Create matrix B, NOT transposed for verification
    List<Path> matrixBPaths = DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB,
            new Random(1337L), MATRIX_B_PATH, false);
    DistributedRowMatrix matrixB = new DistributedRowMatrix(matrixBPaths.get(0), CONF_INPUT_DIR, numRowsB,
            numColsB);
    matrixB.setConf(conf);

    // Verification
    DistributedRowMatrix matrixD = matrixA.multiplyJava(matrixB, MATRIX_D_PATH);
    if (matrixC.verify(matrixD)) {
        System.out.println("Verify PASSED!");
    } else {
        System.out.println("Verify FAILED!");
    }

    if (isDebugging) {
        System.out.println("\nMatrix A:");
        matrixA.printDistributedRowMatrix();
        System.out.println("\nMatrix B:");
        matrixB.printDistributedRowMatrix();

        System.out.println("\nTransposedMatrix B:");
        // Load DistributedRowMatrix transposedMatrixB
        DistributedRowMatrix transposedMatrixB = new DistributedRowMatrix(transposedMatrixBPaths.get(0),
                CONF_INPUT_DIR, numColsB, numRowsB);
        transposedMatrixB.setConf(conf);
        transposedMatrixB.printDistributedRowMatrix();

        System.out.println("\nMatrix C:");
        matrixC.printDistributedRowMatrix();
        System.out.println("\nMatrix D:");
        matrixD.printDistributedRowMatrix();

        // Print out log files
        printOutput(conf);
    }
}

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.util.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU/*from  w w w  .jav a2  s .  co  m*/
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU,
        boolean transposeMatrixA) throws IOException, ClassNotFoundException, InterruptedException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new CardinalityException(numCols, other.numRows());
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Transpose Matrix within a new MapReduce Job
    DistributedRowMatrix transposed = this;
    if (transposeMatrixA) {
        transposed = transposed.transpose();
    }
    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = null;
    if (!useGPU) {
        job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, transposed.rowPath,
                other.rowPath, outPath.getParent(), other.numCols);
    } else { // use GPU
        job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, transposed.rowPath,
                other.rowPath, outPath.getParent(), other.numCols);
    }

    // Multiply Matrix with transposed one
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.util.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU//  w w  w.j  ava 2  s  .co m
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU)
        throws IOException, ClassNotFoundException, InterruptedException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")");
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = null;
    if (!useGPU) {
        job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, this.rowPath,
                other.rowPath, outPath.getParent());

    } else { // use GPU

        job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, this.rowPath,
                other.rowPath, outPath.getParent());
    }

    // Multiply Matrix
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:boa.datagen.MapFileGen.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (SEQ_FILE_PATH.isEmpty()) {
        System.out.println("Missing path to sequence file. Please specify it in the properties file.");
        return;//from   w  w w  . j  av  a  2 s. c  om
    }
    String base = "hdfs://boa-njt/";
    Configuration conf = new Configuration();
    conf.set("fs.default.name", base);
    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(SEQ_FILE_PATH);
    String name = path.getName();
    if (fs.isFile(path)) {
        if (path.getName().equals(MapFile.DATA_FILE_NAME)) {
            MapFile.fix(fs, path.getParent(), Text.class, BytesWritable.class, false, conf);
        } else {
            Path dataFile = new Path(path.getParent(), MapFile.DATA_FILE_NAME);
            fs.rename(path, dataFile);
            Path dir = new Path(path.getParent(), name);
            fs.mkdirs(dir);
            fs.rename(dataFile, new Path(dir, dataFile.getName()));
            MapFile.fix(fs, dir, Text.class, BytesWritable.class, false, conf);
        }
    } else {
        FileStatus[] files = fs.listStatus(path);
        for (FileStatus file : files) {
            path = file.getPath();
            if (fs.isFile(path)) {
                Path dataFile = new Path(path.getParent(), MapFile.DATA_FILE_NAME);
                fs.rename(path, dataFile);
                MapFile.fix(fs, dataFile.getParent(), Text.class, BytesWritable.class, false, conf);
                break;
            }
        }
    }
    fs.close();
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

private static void moveTaskOutputs(JobConf conf, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    String taskId = conf.get("mapred.task.id");

    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + taskId);
            }// w w w  .ja v a  2  s  . c  om
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + taskId);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(conf, fs, jobOutputDir, path.getPath());
            }
        }
    }
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

private static void moveTaskOutputs(Configuration conf, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true))
                throw new IOException("Failed to delete earlier output of task: " + taskId);

            if (!fs.rename(taskOutput, finalOutputPath))
                throw new IOException("Failed to save output of task: " + taskId);
        }//w  ww  .j a v  a 2s .  c o  m

        LOG.debug("Moved {} to {}", taskOutput, finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths)
                moveTaskOutputs(conf, fs, jobOutputDir, path.getPath());
        }
    }
}