Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java

License:Apache License

public static List<Path> writeDistributedRowMatrix(Configuration conf, double[][] matrix, int rows, int columns,
        Path path, int numBspTask, int numGPUBspTask, int GPUPercentage) throws IOException {

    List<Path> splittedFiles = new ArrayList<Path>();

    // Compute work distributions
    int cpuTaskNum = numBspTask - numGPUBspTask;
    int inputVectorsPerGPUTask = 0;
    int inputVectorsPerCPU = 0;
    int inputVectorsPerCPUTask = 0;
    if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) {
        inputVectorsPerGPUTask = (rows * GPUPercentage) / 100;
        inputVectorsPerCPU = rows - inputVectorsPerGPUTask;
    } else {/*from ww  w.ja v a 2s. co  m*/
        inputVectorsPerCPU = rows;
    }
    if (cpuTaskNum > 0) {
        inputVectorsPerCPUTask = inputVectorsPerCPU / cpuTaskNum;
    }

    for (int part = 0; part < numBspTask; part++) {

        Path partIn = new Path(path, "part" + part + ".seq");
        splittedFiles.add(partIn);
        FileSystem fs = FileSystem.get(conf);
        final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, IntWritable.class,
                VectorWritable.class, CompressionType.NONE);

        int interval = 0;
        if (part > cpuTaskNum) {
            interval = inputVectorsPerGPUTask;
        } else {
            interval = inputVectorsPerCPUTask;
        }
        int start = interval * part;
        int end = start + interval;
        if ((numBspTask - 1) == part) {
            end = rows; // set to totalRows
        }
        LOG.info("Partition " + part + " file " + partIn.getParent().getName() + "/" + partIn.getName()
                + " from " + start + " to " + (end - 1));

        for (int i = start; i < end; i++) {
            DenseDoubleVector rowVector = new DenseDoubleVector(matrix[i]);
            dataWriter.append(new IntWritable(i), new VectorWritable(rowVector));
        }
        dataWriter.close();
    }

    return splittedFiles;
}

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.util.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU/*ww w . j  av  a  2s .co m*/
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU,
        boolean transposeMatrixA) throws IOException, ClassNotFoundException, InterruptedException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new CardinalityException(numCols, other.numRows());
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Transpose Matrix within a new MapReduce Job
    DistributedRowMatrix transposed = this;
    if (transposeMatrixA) {
        transposed = transposed.transpose();
    }
    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = null;
    if (!useGPU) {
        job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, transposed.rowPath,
                other.rowPath, outPath.getParent(), other.numCols);
    } else { // use GPU
        job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, transposed.rowPath,
                other.rowPath, outPath.getParent(), other.numCols);
    }

    // Multiply Matrix with transposed one
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.util.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU//ww  w .ja va2s .com
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU)
        throws IOException, ClassNotFoundException, InterruptedException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")");
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = null;
    if (!useGPU) {
        job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, this.rowPath,
                other.rowPath, outPath.getParent());

    } else { // use GPU

        job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, this.rowPath,
                other.rowPath, outPath.getParent());
    }

    // Multiply Matrix
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:azkaban.jobtype.javautils.HadoopUtils.java

License:Apache License

public static void saveProps(FileSystem fs, Props props, String file) throws IOException {
    Path path = new Path(file);

    // create directory if it does not exist.
    Path parent = path.getParent();
    if (!fs.exists(parent))
        fs.mkdirs(parent);//  w  w w  .ja v  a2  s. co  m

    // write out properties
    OutputStream output = fs.create(path);
    try {
        props.storeFlattened(output);
    } finally {
        output.close();
    }
}

From source file:azkaban.viewer.hdfs.HdfsBrowserServlet.java

License:Apache License

private void getPathSegments(Path path, List<Path> paths, List<String> segments) {
    Path curr = path;
    while (curr.getParent() != null) {
        paths.add(curr);/*from w w  w  . j av  a2s.  c  om*/
        segments.add(curr.getName());
        curr = curr.getParent();
    }
    Collections.reverse(paths);
    Collections.reverse(segments);
}

From source file:azkaban.web.pages.HdfsBrowserServlet.java

License:Apache License

private void displayDir(HttpServletRequest req, HttpServletResponse resp, Path path) throws IOException {

    Page page = newPage(req, resp, "azkaban/web/pages/hdfs_browser_dir.vm");

    List<Path> paths = new ArrayList<Path>();
    List<String> segments = new ArrayList<String>();
    Path curr = path;
    while (curr.getParent() != null) {
        paths.add(curr);/* w ww  .j  av a 2s  .c om*/
        segments.add(curr.getName());
        curr = curr.getParent();
    }

    Collections.reverse(paths);
    Collections.reverse(segments);

    page.add("paths", paths);
    page.add("segments", segments);
    page.add("subdirs", _fs.listStatus(path)); // ??? line
    page.render();

}

From source file:babel.prep.datedcorpus.DatedLangFilesOutputFormat.java

License:Apache License

public RecordWriter<Text, Text> getBaseRecordWriter(final FileSystem fs, JobConf job, String name,
        final Progressable progress) throws IOException {
    final Path dumpFile = new Path(FileOutputFormat.getOutputPath(job), name);

    // Get the old copy out of the way
    if (fs.exists(dumpFile)) {
        fs.delete(dumpFile, true);//from  ww  w  . j a v a  2 s  .c o m
    } else {
        fs.mkdirs(dumpFile.getParent());
    }

    return new RecordWriter<Text, Text>() {
        public synchronized void write(Text key, Text versText) throws IOException {
            try {
                BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
                        new FileOutputStream(new File(dumpFile.toUri()), true), DEFAULT_CHARSET));

                writer.write(versText.toString());
                writer.close();
            } catch (Exception e) {
                throw new RuntimeException("Error writing page versions: " + e.toString());
            }
        }

        public synchronized void close(Reporter reporter) throws IOException {
        }
    };
}

From source file:backup.namenode.NameNodeBackupBlockCheckProcessor.java

License:Apache License

public static boolean shouldIgnore(Set<Path> pathSetToIgnore, Path path) {
    do {/*  w  w  w  .j av a 2  s.c o m*/
        if (pathSetToIgnore.contains(path)) {
            return true;
        }
        path = path.getParent();
    } while (path != null);
    return false;
}

From source file:boa.datagen.MapFileGen.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (SEQ_FILE_PATH.isEmpty()) {
        System.out.println("Missing path to sequence file. Please specify it in the properties file.");
        return;/*from  w w w  .  java 2s  . c  om*/
    }
    String base = "hdfs://boa-njt/";
    Configuration conf = new Configuration();
    conf.set("fs.default.name", base);
    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(SEQ_FILE_PATH);
    String name = path.getName();
    if (fs.isFile(path)) {
        if (path.getName().equals(MapFile.DATA_FILE_NAME)) {
            MapFile.fix(fs, path.getParent(), Text.class, BytesWritable.class, false, conf);
        } else {
            Path dataFile = new Path(path.getParent(), MapFile.DATA_FILE_NAME);
            fs.rename(path, dataFile);
            Path dir = new Path(path.getParent(), name);
            fs.mkdirs(dir);
            fs.rename(dataFile, new Path(dir, dataFile.getName()));
            MapFile.fix(fs, dir, Text.class, BytesWritable.class, false, conf);
        }
    } else {
        FileStatus[] files = fs.listStatus(path);
        for (FileStatus file : files) {
            path = file.getPath();
            if (fs.isFile(path)) {
                Path dataFile = new Path(path.getParent(), MapFile.DATA_FILE_NAME);
                fs.rename(path, dataFile);
                MapFile.fix(fs, dataFile.getParent(), Text.class, BytesWritable.class, false, conf);
                break;
            }
        }
    }
    fs.close();
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

/**
 * copies all files from the taskoutputpath to the outputpath
 *
 * @param conf//from www .  j  av  a2 s. com
 */
public static void commitTask(JobConf conf) throws IOException {
    Path taskOutputPath = new Path(conf.get("mapred.work.output.dir"));

    FileSystem fs = getFSSafe(conf, taskOutputPath);

    if (fs == null)
        return;

    AtomicInteger integer = pathCounts.get(taskOutputPath.toString());

    if (integer.decrementAndGet() != 0)
        return;

    String taskId = conf.get("mapred.task.id");

    LOG.info("committing task: '" + taskId + "' - " + taskOutputPath);

    if (taskOutputPath != null) {
        if (writeDirectlyToWorkingPath(conf, taskOutputPath))
            return;

        if (fs.exists(taskOutputPath)) {
            Path jobOutputPath = taskOutputPath.getParent().getParent();
            // Move the task outputs to their final place
            moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath);

            // Delete the temporary task-specific output directory
            if (!fs.delete(taskOutputPath, true))
                LOG.info("failed to delete the temporary output directory of task: '" + taskId + "' - "
                        + taskOutputPath);

            LOG.info("saved output of task '" + taskId + "' to " + jobOutputPath);
        }
    }
}