Example usage for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent()

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:org.apache.gobblin.util.PathUtils.java

License:Apache License

/**
 * Returns the root path for the specified path.
 *
 * @see Path/*from  w w  w  .  j  a  va2 s.c om*/
 */
public static Path getRootPath(Path path) {
    if (path.isRoot()) {
        return path;
    }
    return getRootPath(path.getParent());
}

From source file:org.apache.gobblin.util.recordcount.CompactionRecordCountProvider.java

License:Apache License

/**
 * This method currently supports converting the given {@link Path} from {@link IngestionRecordCountProvider}.
 * The converted {@link Path} will start with {@link #M_OUTPUT_FILE_PREFIX}.
 *///www. j  a  va 2 s.  c o  m
public Path convertPath(Path path, String extension, RecordCountProvider src) {
    if (this.getClass().equals(src.getClass())) {
        return path;
    } else if (src.getClass().equals(IngestionRecordCountProvider.class)) {
        String newFileName = constructFileName(M_OUTPUT_FILE_PREFIX, extension, src.getRecordCount(path));
        return new Path(path.getParent(), newFileName);
    } else {
        throw getNotImplementedException(src);
    }
}

From source file:org.apache.gobblin.util.WriterUtils.java

License:Apache License

public static void mkdirsWithRecursivePermissionWithRetry(final FileSystem fs, final Path path,
        FsPermission perm, Config retrierConfig) throws IOException {

    if (fs.exists(path)) {
        return;//  ww  w .j ava 2  s  . c  o  m
    }

    if (path.getParent() != null && !fs.exists(path.getParent())) {
        mkdirsWithRecursivePermissionWithRetry(fs, path.getParent(), perm, retrierConfig);
    }

    if (!fs.mkdirs(path, perm)) {
        throw new IOException(String.format("Unable to mkdir %s with permission %s", path, perm));
    }

    if (retrierConfig != NO_RETRY_CONFIG) {
        //Wait until file is not there as it can happen the file fail to exist right away on eventual consistent fs like Amazon S3
        Retryer<Void> retryer = RetryerFactory.newInstance(retrierConfig);

        try {
            retryer.call(() -> {
                if (!fs.exists(path)) {
                    throw new IOException(
                            "Path " + path + " does not exist however it should. Will wait more.");
                }
                return null;
            });
        } catch (Exception e) {
            throw new IOException("Path " + path + "does not exist however it should. Giving up..." + e);
        }
    }

    // Double check permission, since fs.mkdirs() may not guarantee to set the permission correctly
    if (!fs.getFileStatus(path).getPermission().equals(perm)) {
        fs.setPermission(path, perm);
    }
}

From source file:org.apache.hadoop.example.ITUseMiniCluster.java

License:Apache License

public void simpleReadAfterWrite(final FileSystem fs) throws IOException {
    LOG.info("Testing read-after-write with FS implementation: {}", fs);
    final Path path = new Path(TEST_PATH, FILENAME);
    if (!fs.mkdirs(path.getParent())) {
        throw new IOException("Mkdirs failed to create " + TEST_PATH);
    }/*from   w ww  .j a  va 2 s  . co m*/
    try (FSDataOutputStream out = fs.create(path)) {
        out.writeUTF(TEXT);
    }
    try (FSDataInputStream in = fs.open(path)) {
        final String result = in.readUTF();
        Assert.assertEquals("Didn't read back text we wrote.", TEXT, result);
    }
}

From source file:org.apache.hama.bsp.GroomServer.java

License:Apache License

private void localizeJob(TaskInProgress tip) throws IOException {
    Task task = tip.getTask();/*from  w w w .ja  v a  2  s.c  om*/
    conf.addResource(task.getJobFile());
    BSPJob defaultJobConf = new BSPJob((HamaConfiguration) conf);
    Path localJobFile = defaultJobConf.getLocalPath(SUBDIR + "/" + task.getTaskID() + "/" + "job.xml");

    RunningJob rjob = addTaskToJob(task.getJobID(), localJobFile, tip);
    BSPJob jobConf = null;

    synchronized (rjob) {
        if (!rjob.localized) {
            FileSystem dfs = FileSystem.get(conf);
            FileSystem localFs = FileSystem.getLocal(conf);
            Path jobDir = localJobFile.getParent();
            if (localFs.exists(jobDir)) {
                localFs.delete(jobDir, true);
                boolean b = localFs.mkdirs(jobDir);
                if (!b)
                    throw new IOException("Not able to create job directory " + jobDir.toString());
            }

            Path localJarFile = defaultJobConf.getLocalPath(SUBDIR + "/" + task.getTaskID() + "/" + "job.jar");

            Path jobFilePath = new Path(task.getJobFile());

            //wait a while for file to finish being written
            for (int i = 0; i < 300 & !dfs.exists(jobFilePath); i++) {
                try {
                    Thread.sleep(100);
                } catch (InterruptedException e) {
                    LOG.warn("Sleep failed", e);
                }
            }

            dfs.copyToLocalFile(jobFilePath, localJobFile);

            HamaConfiguration conf = new HamaConfiguration();
            conf.addResource(localJobFile);
            jobConf = new BSPJob(conf, task.getJobID().toString());

            Path jarFile = null;
            if (jobConf.getJar() != null) {
                jarFile = new Path(jobConf.getJar());
            } else {
                LOG.warn("No jar file for job " + task.getJobID() + " has been defined!");
            }
            jobConf.setJar(localJarFile.toString());

            if (jarFile != null) {
                dfs.copyToLocalFile(jarFile, localJarFile);

                // also unjar the job.jar files in workdir
                File workDir = new File(new File(localJobFile.toString()).getParent(), "work");
                if (!workDir.mkdirs()) {
                    if (!workDir.isDirectory()) {
                        throw new IOException("Mkdirs failed to create " + workDir.toString());
                    }
                }
                RunJar.unJar(new File(localJarFile.toString()), workDir);
            }
            rjob.localized = true;
        } else {
            HamaConfiguration conf = new HamaConfiguration();
            conf.addResource(rjob.getJobFile());
            jobConf = new BSPJob(conf, rjob.getJobId().toString());
        }
    }

    launchTaskForJob(tip, jobConf);
}

From source file:org.apache.hama.examples.Kmeans.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 4 || (args.length > 4 && args.length != 7)) {
        System.out.println(/*from w w w .  j av a  2 s  .c  o m*/
                "USAGE: <INPUT_PATH> <OUTPUT_PATH> <MAXITERATIONS> <K (how many centers)> -g [<COUNT> <DIMENSION OF VECTORS>]");
        return;
    }
    HamaConfiguration conf = new HamaConfiguration();

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileSystem fs = FileSystem.get(conf);
    Path center = null;
    if (fs.isFile(in)) {
        center = new Path(in.getParent(), "center/cen.seq");
    } else {
        center = new Path(in, "center/cen.seq");
    }
    Path centerOut = new Path(out, "center/center_output.seq");
    conf.set(KMeansBSP.CENTER_IN_PATH, center.toString());
    conf.set(KMeansBSP.CENTER_OUT_PATH, centerOut.toString());
    int iterations = Integer.parseInt(args[2]);
    conf.setInt(KMeansBSP.MAX_ITERATIONS_KEY, iterations);
    int k = Integer.parseInt(args[3]);
    if (args.length == 7 && args[4].equals("-g")) {
        int count = Integer.parseInt(args[5]);
        if (k > count)
            throw new IllegalArgumentException("K can't be greater than n!");
        int dimension = Integer.parseInt(args[6]);
        System.out.println("N: " + count + " Dimension: " + dimension + " Iterations: " + iterations);
        if (!fs.isFile(in)) {
            in = new Path(in, "input.seq");
        }
        // prepare the input, like deleting old versions and creating centers
        KMeansBSP.prepareInput(count, k, dimension, conf, in, center, out, fs);
    } else {
        if (!fs.isFile(in)) {
            System.out.println("Cannot read text input file: " + in.toString());
            return;
        }
        // Set the last argument to TRUE if first column is required to be the key
        in = KMeansBSP.prepareInputText(k, conf, in, center, out, fs, true);
    }

    BSPJob job = KMeansBSP.createJob(conf, in, out, true);

    long startTime = System.currentTimeMillis();
    // just submit the job
    if (job.waitForCompletion(true)) {
        System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    }

    System.out.println("\nHere are a few lines of output:");
    List<String> results = KMeansBSP.readOutput(conf, out, fs, 4);
    for (String line : results) {
        System.out.println(line);
    }
    System.out.println("...");
}

From source file:org.apache.hama.examples.SpMV.java

License:Apache License

/**
 * SpMV produces a file, which contains result dense vector in format of pairs
 * of integer and double. The aim of this method is to convert SpMV output to
 * format usable in subsequent computation - dense vector. It can be usable
 * for iterative solvers. IMPORTANT: currently it is used in SpMV. It can be a
 * bottle neck, because all input needs to be stored in memory.
 * // www. jav  a2  s.  c  o m
 * @param SpMVoutputPathString output path, which represents directory with
 *          part files.
 * @param conf configuration
 * @return path to output vector.
 * @throws IOException
 */
public static String convertSpMVOutputToDenseVector(String SpMVoutputPathString, HamaConfiguration conf)
        throws IOException {
    List<Integer> indeces = new ArrayList<Integer>();
    List<Double> values = new ArrayList<Double>();

    FileSystem fs = FileSystem.get(conf);
    Path SpMVOutputPath = new Path(SpMVoutputPathString);
    Path resultOutputPath = SpMVOutputPath.getParent().suffix("/result");
    FileStatus[] stats = fs.listStatus(SpMVOutputPath);
    for (FileStatus stat : stats) {
        String filePath = stat.getPath().toUri().getPath();
        SequenceFile.Reader reader = null;
        fs.open(new Path(filePath));
        try {
            reader = new SequenceFile.Reader(fs, new Path(filePath), conf);
            IntWritable key = new IntWritable();
            DoubleWritable value = new DoubleWritable();
            while (reader.next(key, value)) {
                indeces.add(key.get());
                values.add(value.get());
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        } finally {
            if (reader != null)
                reader.close();
        }
    }
    DenseVectorWritable result = new DenseVectorWritable();
    result.setSize(indeces.size());
    for (int i = 0; i < indeces.size(); i++)
        result.addCell(indeces.get(i), values.get(i));
    writeToFile(resultOutputPath.toString(), result, conf);
    return resultOutputPath.toString();
}

From source file:org.apache.hama.examples.util.WritableUtil.java

License:Apache License

/**
 * SpMV produces a file, which contains result dense vector in format of pairs
 * of integer and double. The aim of this method is to convert SpMV output to
 * format usable in subsequent computation - dense vector. It can be usable
 * for iterative solvers. IMPORTANT: currently it is used in SpMV. It can be a
 * bottle neck, because all input needs to be stored in memory.
 * // w  ww  .ja v a  2 s  .c om
 * @param SpMVoutputPathString
 *          output path, which represents directory with part files.
 * @param conf
 *          configuration
 * @return path to output vector.
 * @throws IOException
 */
public static String convertSpMVOutputToDenseVector(String SpMVoutputPathString, Configuration conf)
        throws IOException {
    List<Integer> indeces = new ArrayList<Integer>();
    List<Double> values = new ArrayList<Double>();

    FileSystem fs = FileSystem.get(conf);
    Path SpMVOutputPath = new Path(SpMVoutputPathString);
    Path resultOutputPath = SpMVOutputPath.getParent().suffix("/result");
    FileStatus[] stats = fs.listStatus(SpMVOutputPath);
    for (FileStatus stat : stats) {
        String filePath = stat.getPath().toUri().getPath();
        SequenceFile.Reader reader = null;
        fs.open(new Path(filePath));
        try {
            reader = new SequenceFile.Reader(fs, new Path(filePath), conf);
            IntWritable key = new IntWritable();
            DoubleWritable value = new DoubleWritable();
            while (reader.next(key, value)) {
                indeces.add(key.get());
                values.add(value.get());
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        } finally {
            if (reader != null)
                reader.close();
        }
    }
    DenseVectorWritable result = new DenseVectorWritable();
    result.setSize(indeces.size());
    for (int i = 0; i < indeces.size(); i++)
        result.addCell(indeces.get(i), values.get(i));
    writeToFile(resultOutputPath.toString(), result, conf);
    return resultOutputPath.toString();
}

From source file:org.apache.hama.ml.kmeans.KMeansBSP.java

License:Apache License

/**
 * Reads input text files and writes it to a sequencefile.
 * //from w w w.j av a 2 s.  com
 * @param k
 * @param conf
 * @param txtIn
 * @param center
 * @param out
 * @param fs
 * @param hasKey true if first column is required to be the key.
 * @return the path of a sequencefile.
 * @throws IOException
 */
public static Path prepareInputText(int k, Configuration conf, Path txtIn, Path center, Path out, FileSystem fs,
        boolean hasKey) throws IOException {

    Path in;
    if (fs.isFile(txtIn)) {
        in = new Path(txtIn.getParent(), "textinput/in.seq");
    } else {
        in = new Path(txtIn, "textinput/in.seq");
    }

    if (fs.exists(out))
        fs.delete(out, true);

    if (fs.exists(center))
        fs.delete(center, true);

    if (fs.exists(in))
        fs.delete(in, true);

    final NullWritable value = NullWritable.get();

    Writer centerWriter = new SequenceFile.Writer(fs, conf, center, VectorWritable.class, NullWritable.class);

    final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, in, VectorWritable.class,
            NullWritable.class, CompressionType.NONE);

    int i = 0;

    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(txtIn)));
    String line;
    while ((line = br.readLine()) != null) {
        String[] split = line.split("\t");
        int columnLength = split.length;
        int indexPos = 0;
        if (hasKey) {
            columnLength = columnLength - 1;
            indexPos++;
        }

        DenseDoubleVector vec = new DenseDoubleVector(columnLength);
        for (int j = 0; j < columnLength; j++) {
            vec.set(j, Double.parseDouble(split[j + indexPos]));
        }

        VectorWritable vector;
        if (hasKey) {
            NamedDoubleVector named = new NamedDoubleVector(split[0], vec);
            vector = new VectorWritable(named);
        } else {
            vector = new VectorWritable(vec);
        }

        dataWriter.append(vector, value);
        if (k > i) {
            centerWriter.append(vector, value);
        }
        i++;
    }
    br.close();
    centerWriter.close();
    dataWriter.close();
    return in;
}

From source file:org.apache.hama.ml.kmeans.TestKMeansBSP.java

License:Apache License

private Path generateInputText(int k, Configuration conf, FileSystem fs, Path in, Path centerIn, Path out,
        int numBspTask) throws IOException {
    int totalNumberOfPoints = 100;
    int interval = totalNumberOfPoints / numBspTask;
    Path parts = new Path(in, "parts");

    for (int part = 0; part < numBspTask; part++) {
        Path partIn = new Path(parts, "part" + part + "/input.txt");
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(partIn)));

        int start = interval * part;
        int end = start + interval - 1;
        if ((numBspTask - 1) == part) {
            end = totalNumberOfPoints;/*from  w  w w. j a  v a  2  s.com*/
        }
        System.out.println("Partition " + part + ": from " + start + " to " + end);

        for (int i = start; i <= end; i++) {
            bw.append(i + "\t" + i + "\n");
        }
        bw.close();

        // Convert input text to sequence file
        Path seqFile = null;
        if (part == 0) {
            seqFile = KMeansBSP.prepareInputText(k, conf, partIn, centerIn, out, fs, false);
        } else {
            seqFile = KMeansBSP.prepareInputText(0, conf, partIn, new Path(centerIn + "_empty.seq"), out, fs,
                    false);
        }

        fs.moveFromLocalFile(seqFile, new Path(parts, "part" + part + ".seq"));
        fs.delete(seqFile.getParent(), true);
        fs.delete(partIn.getParent(), true);
    }

    return parts;
}