List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:org.apache.gobblin.util.PathUtils.java
License:Apache License
/** * Returns the root path for the specified path. * * @see Path/*from w w w . j a va2 s.c om*/ */ public static Path getRootPath(Path path) { if (path.isRoot()) { return path; } return getRootPath(path.getParent()); }
From source file:org.apache.gobblin.util.recordcount.CompactionRecordCountProvider.java
License:Apache License
/** * This method currently supports converting the given {@link Path} from {@link IngestionRecordCountProvider}. * The converted {@link Path} will start with {@link #M_OUTPUT_FILE_PREFIX}. *///www. j a va 2 s. c o m public Path convertPath(Path path, String extension, RecordCountProvider src) { if (this.getClass().equals(src.getClass())) { return path; } else if (src.getClass().equals(IngestionRecordCountProvider.class)) { String newFileName = constructFileName(M_OUTPUT_FILE_PREFIX, extension, src.getRecordCount(path)); return new Path(path.getParent(), newFileName); } else { throw getNotImplementedException(src); } }
From source file:org.apache.gobblin.util.WriterUtils.java
License:Apache License
public static void mkdirsWithRecursivePermissionWithRetry(final FileSystem fs, final Path path, FsPermission perm, Config retrierConfig) throws IOException { if (fs.exists(path)) { return;// ww w .j ava 2 s . c o m } if (path.getParent() != null && !fs.exists(path.getParent())) { mkdirsWithRecursivePermissionWithRetry(fs, path.getParent(), perm, retrierConfig); } if (!fs.mkdirs(path, perm)) { throw new IOException(String.format("Unable to mkdir %s with permission %s", path, perm)); } if (retrierConfig != NO_RETRY_CONFIG) { //Wait until file is not there as it can happen the file fail to exist right away on eventual consistent fs like Amazon S3 Retryer<Void> retryer = RetryerFactory.newInstance(retrierConfig); try { retryer.call(() -> { if (!fs.exists(path)) { throw new IOException( "Path " + path + " does not exist however it should. Will wait more."); } return null; }); } catch (Exception e) { throw new IOException("Path " + path + "does not exist however it should. Giving up..." + e); } } // Double check permission, since fs.mkdirs() may not guarantee to set the permission correctly if (!fs.getFileStatus(path).getPermission().equals(perm)) { fs.setPermission(path, perm); } }
From source file:org.apache.hadoop.example.ITUseMiniCluster.java
License:Apache License
public void simpleReadAfterWrite(final FileSystem fs) throws IOException { LOG.info("Testing read-after-write with FS implementation: {}", fs); final Path path = new Path(TEST_PATH, FILENAME); if (!fs.mkdirs(path.getParent())) { throw new IOException("Mkdirs failed to create " + TEST_PATH); }/*from w ww .j a va 2 s . co m*/ try (FSDataOutputStream out = fs.create(path)) { out.writeUTF(TEXT); } try (FSDataInputStream in = fs.open(path)) { final String result = in.readUTF(); Assert.assertEquals("Didn't read back text we wrote.", TEXT, result); } }
From source file:org.apache.hama.bsp.GroomServer.java
License:Apache License
private void localizeJob(TaskInProgress tip) throws IOException { Task task = tip.getTask();/*from w w w .ja v a 2 s.c om*/ conf.addResource(task.getJobFile()); BSPJob defaultJobConf = new BSPJob((HamaConfiguration) conf); Path localJobFile = defaultJobConf.getLocalPath(SUBDIR + "/" + task.getTaskID() + "/" + "job.xml"); RunningJob rjob = addTaskToJob(task.getJobID(), localJobFile, tip); BSPJob jobConf = null; synchronized (rjob) { if (!rjob.localized) { FileSystem dfs = FileSystem.get(conf); FileSystem localFs = FileSystem.getLocal(conf); Path jobDir = localJobFile.getParent(); if (localFs.exists(jobDir)) { localFs.delete(jobDir, true); boolean b = localFs.mkdirs(jobDir); if (!b) throw new IOException("Not able to create job directory " + jobDir.toString()); } Path localJarFile = defaultJobConf.getLocalPath(SUBDIR + "/" + task.getTaskID() + "/" + "job.jar"); Path jobFilePath = new Path(task.getJobFile()); //wait a while for file to finish being written for (int i = 0; i < 300 & !dfs.exists(jobFilePath); i++) { try { Thread.sleep(100); } catch (InterruptedException e) { LOG.warn("Sleep failed", e); } } dfs.copyToLocalFile(jobFilePath, localJobFile); HamaConfiguration conf = new HamaConfiguration(); conf.addResource(localJobFile); jobConf = new BSPJob(conf, task.getJobID().toString()); Path jarFile = null; if (jobConf.getJar() != null) { jarFile = new Path(jobConf.getJar()); } else { LOG.warn("No jar file for job " + task.getJobID() + " has been defined!"); } jobConf.setJar(localJarFile.toString()); if (jarFile != null) { dfs.copyToLocalFile(jarFile, localJarFile); // also unjar the job.jar files in workdir File workDir = new File(new File(localJobFile.toString()).getParent(), "work"); if (!workDir.mkdirs()) { if (!workDir.isDirectory()) { throw new IOException("Mkdirs failed to create " + workDir.toString()); } } RunJar.unJar(new File(localJarFile.toString()), workDir); } rjob.localized = true; } else { HamaConfiguration conf = new HamaConfiguration(); conf.addResource(rjob.getJobFile()); jobConf = new BSPJob(conf, rjob.getJobId().toString()); } } launchTaskForJob(tip, jobConf); }
From source file:org.apache.hama.examples.Kmeans.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 4 || (args.length > 4 && args.length != 7)) { System.out.println(/*from w w w . j av a 2 s .c o m*/ "USAGE: <INPUT_PATH> <OUTPUT_PATH> <MAXITERATIONS> <K (how many centers)> -g [<COUNT> <DIMENSION OF VECTORS>]"); return; } HamaConfiguration conf = new HamaConfiguration(); Path in = new Path(args[0]); Path out = new Path(args[1]); FileSystem fs = FileSystem.get(conf); Path center = null; if (fs.isFile(in)) { center = new Path(in.getParent(), "center/cen.seq"); } else { center = new Path(in, "center/cen.seq"); } Path centerOut = new Path(out, "center/center_output.seq"); conf.set(KMeansBSP.CENTER_IN_PATH, center.toString()); conf.set(KMeansBSP.CENTER_OUT_PATH, centerOut.toString()); int iterations = Integer.parseInt(args[2]); conf.setInt(KMeansBSP.MAX_ITERATIONS_KEY, iterations); int k = Integer.parseInt(args[3]); if (args.length == 7 && args[4].equals("-g")) { int count = Integer.parseInt(args[5]); if (k > count) throw new IllegalArgumentException("K can't be greater than n!"); int dimension = Integer.parseInt(args[6]); System.out.println("N: " + count + " Dimension: " + dimension + " Iterations: " + iterations); if (!fs.isFile(in)) { in = new Path(in, "input.seq"); } // prepare the input, like deleting old versions and creating centers KMeansBSP.prepareInput(count, k, dimension, conf, in, center, out, fs); } else { if (!fs.isFile(in)) { System.out.println("Cannot read text input file: " + in.toString()); return; } // Set the last argument to TRUE if first column is required to be the key in = KMeansBSP.prepareInputText(k, conf, in, center, out, fs, true); } BSPJob job = KMeansBSP.createJob(conf, in, out, true); long startTime = System.currentTimeMillis(); // just submit the job if (job.waitForCompletion(true)) { System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); } System.out.println("\nHere are a few lines of output:"); List<String> results = KMeansBSP.readOutput(conf, out, fs, 4); for (String line : results) { System.out.println(line); } System.out.println("..."); }
From source file:org.apache.hama.examples.SpMV.java
License:Apache License
/** * SpMV produces a file, which contains result dense vector in format of pairs * of integer and double. The aim of this method is to convert SpMV output to * format usable in subsequent computation - dense vector. It can be usable * for iterative solvers. IMPORTANT: currently it is used in SpMV. It can be a * bottle neck, because all input needs to be stored in memory. * // www. jav a2 s. c o m * @param SpMVoutputPathString output path, which represents directory with * part files. * @param conf configuration * @return path to output vector. * @throws IOException */ public static String convertSpMVOutputToDenseVector(String SpMVoutputPathString, HamaConfiguration conf) throws IOException { List<Integer> indeces = new ArrayList<Integer>(); List<Double> values = new ArrayList<Double>(); FileSystem fs = FileSystem.get(conf); Path SpMVOutputPath = new Path(SpMVoutputPathString); Path resultOutputPath = SpMVOutputPath.getParent().suffix("/result"); FileStatus[] stats = fs.listStatus(SpMVOutputPath); for (FileStatus stat : stats) { String filePath = stat.getPath().toUri().getPath(); SequenceFile.Reader reader = null; fs.open(new Path(filePath)); try { reader = new SequenceFile.Reader(fs, new Path(filePath), conf); IntWritable key = new IntWritable(); DoubleWritable value = new DoubleWritable(); while (reader.next(key, value)) { indeces.add(key.get()); values.add(value.get()); } } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) reader.close(); } } DenseVectorWritable result = new DenseVectorWritable(); result.setSize(indeces.size()); for (int i = 0; i < indeces.size(); i++) result.addCell(indeces.get(i), values.get(i)); writeToFile(resultOutputPath.toString(), result, conf); return resultOutputPath.toString(); }
From source file:org.apache.hama.examples.util.WritableUtil.java
License:Apache License
/** * SpMV produces a file, which contains result dense vector in format of pairs * of integer and double. The aim of this method is to convert SpMV output to * format usable in subsequent computation - dense vector. It can be usable * for iterative solvers. IMPORTANT: currently it is used in SpMV. It can be a * bottle neck, because all input needs to be stored in memory. * // w ww .ja v a 2 s .c om * @param SpMVoutputPathString * output path, which represents directory with part files. * @param conf * configuration * @return path to output vector. * @throws IOException */ public static String convertSpMVOutputToDenseVector(String SpMVoutputPathString, Configuration conf) throws IOException { List<Integer> indeces = new ArrayList<Integer>(); List<Double> values = new ArrayList<Double>(); FileSystem fs = FileSystem.get(conf); Path SpMVOutputPath = new Path(SpMVoutputPathString); Path resultOutputPath = SpMVOutputPath.getParent().suffix("/result"); FileStatus[] stats = fs.listStatus(SpMVOutputPath); for (FileStatus stat : stats) { String filePath = stat.getPath().toUri().getPath(); SequenceFile.Reader reader = null; fs.open(new Path(filePath)); try { reader = new SequenceFile.Reader(fs, new Path(filePath), conf); IntWritable key = new IntWritable(); DoubleWritable value = new DoubleWritable(); while (reader.next(key, value)) { indeces.add(key.get()); values.add(value.get()); } } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) reader.close(); } } DenseVectorWritable result = new DenseVectorWritable(); result.setSize(indeces.size()); for (int i = 0; i < indeces.size(); i++) result.addCell(indeces.get(i), values.get(i)); writeToFile(resultOutputPath.toString(), result, conf); return resultOutputPath.toString(); }
From source file:org.apache.hama.ml.kmeans.KMeansBSP.java
License:Apache License
/** * Reads input text files and writes it to a sequencefile. * //from w w w.j av a 2 s. com * @param k * @param conf * @param txtIn * @param center * @param out * @param fs * @param hasKey true if first column is required to be the key. * @return the path of a sequencefile. * @throws IOException */ public static Path prepareInputText(int k, Configuration conf, Path txtIn, Path center, Path out, FileSystem fs, boolean hasKey) throws IOException { Path in; if (fs.isFile(txtIn)) { in = new Path(txtIn.getParent(), "textinput/in.seq"); } else { in = new Path(txtIn, "textinput/in.seq"); } if (fs.exists(out)) fs.delete(out, true); if (fs.exists(center)) fs.delete(center, true); if (fs.exists(in)) fs.delete(in, true); final NullWritable value = NullWritable.get(); Writer centerWriter = new SequenceFile.Writer(fs, conf, center, VectorWritable.class, NullWritable.class); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, in, VectorWritable.class, NullWritable.class, CompressionType.NONE); int i = 0; BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(txtIn))); String line; while ((line = br.readLine()) != null) { String[] split = line.split("\t"); int columnLength = split.length; int indexPos = 0; if (hasKey) { columnLength = columnLength - 1; indexPos++; } DenseDoubleVector vec = new DenseDoubleVector(columnLength); for (int j = 0; j < columnLength; j++) { vec.set(j, Double.parseDouble(split[j + indexPos])); } VectorWritable vector; if (hasKey) { NamedDoubleVector named = new NamedDoubleVector(split[0], vec); vector = new VectorWritable(named); } else { vector = new VectorWritable(vec); } dataWriter.append(vector, value); if (k > i) { centerWriter.append(vector, value); } i++; } br.close(); centerWriter.close(); dataWriter.close(); return in; }
From source file:org.apache.hama.ml.kmeans.TestKMeansBSP.java
License:Apache License
private Path generateInputText(int k, Configuration conf, FileSystem fs, Path in, Path centerIn, Path out, int numBspTask) throws IOException { int totalNumberOfPoints = 100; int interval = totalNumberOfPoints / numBspTask; Path parts = new Path(in, "parts"); for (int part = 0; part < numBspTask; part++) { Path partIn = new Path(parts, "part" + part + "/input.txt"); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(partIn))); int start = interval * part; int end = start + interval - 1; if ((numBspTask - 1) == part) { end = totalNumberOfPoints;/*from w w w. j a v a 2 s.com*/ } System.out.println("Partition " + part + ": from " + start + " to " + end); for (int i = start; i <= end; i++) { bw.append(i + "\t" + i + "\n"); } bw.close(); // Convert input text to sequence file Path seqFile = null; if (part == 0) { seqFile = KMeansBSP.prepareInputText(k, conf, partIn, centerIn, out, fs, false); } else { seqFile = KMeansBSP.prepareInputText(0, conf, partIn, new Path(centerIn + "_empty.seq"), out, fs, false); } fs.moveFromLocalFile(seqFile, new Path(parts, "part" + part + ".seq")); fs.delete(seqFile.getParent(), true); fs.delete(partIn.getParent(), true); } return parts; }