List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java
License:Apache License
public static List<Path> writeDistributedRowMatrix(Configuration conf, double[][] matrix, int rows, int columns, Path path, int numBspTask, int numGPUBspTask, int GPUPercentage) throws IOException { List<Path> splittedFiles = new ArrayList<Path>(); // Compute work distributions int cpuTaskNum = numBspTask - numGPUBspTask; int inputVectorsPerGPUTask = 0; int inputVectorsPerCPU = 0; int inputVectorsPerCPUTask = 0; if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) { inputVectorsPerGPUTask = (rows * GPUPercentage) / 100; inputVectorsPerCPU = rows - inputVectorsPerGPUTask; } else {/*from ww w.ja v a 2s. co m*/ inputVectorsPerCPU = rows; } if (cpuTaskNum > 0) { inputVectorsPerCPUTask = inputVectorsPerCPU / cpuTaskNum; } for (int part = 0; part < numBspTask; part++) { Path partIn = new Path(path, "part" + part + ".seq"); splittedFiles.add(partIn); FileSystem fs = FileSystem.get(conf); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, IntWritable.class, VectorWritable.class, CompressionType.NONE); int interval = 0; if (part > cpuTaskNum) { interval = inputVectorsPerGPUTask; } else { interval = inputVectorsPerCPUTask; } int start = interval * part; int end = start + interval; if ((numBspTask - 1) == part) { end = rows; // set to totalRows } LOG.info("Partition " + part + " file " + partIn.getParent().getName() + "/" + partIn.getName() + " from " + start + " to " + (end - 1)); for (int i = start; i < end; i++) { DenseDoubleVector rowVector = new DenseDoubleVector(matrix[i]); dataWriter.append(new IntWritable(i), new VectorWritable(rowVector)); } dataWriter.close(); } return splittedFiles; }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.util.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix multiplication A * B using MapReduce tasks on CPU or * GPU/*ww w . j av a 2s .co m*/ * * @param other a DistributedRowMatrix * @param outPath path to write result to * @param useGPU use GPU or CPU (default: false, use CPU) * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU, boolean transposeMatrixA) throws IOException, ClassNotFoundException, InterruptedException { // Check if cols of MatrixA = rows of MatrixB // (l x m) * (m x n) = (l x n) if (numCols != other.numRows()) { throw new CardinalityException(numCols, other.numRows()); } Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf(); // Transpose Matrix within a new MapReduce Job DistributedRowMatrix transposed = this; if (transposeMatrixA) { transposed = transposed.transpose(); } // Debug // System.out.println("DistributedRowMatrix transposed:"); // transposed.printDistributedRowMatrix(); // Build MatrixMultiplication job configuration BSPJob job = null; if (!useGPU) { job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, transposed.rowPath, other.rowPath, outPath.getParent(), other.numCols); } else { // use GPU job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, transposed.rowPath, other.rowPath, outPath.getParent(), other.numCols); } // Multiply Matrix with transposed one if (job.waitForCompletion(true)) { // Rename result file to output path Configuration conf = job.getConfiguration(); FileSystem fs = outPath.getFileSystem(conf); FileStatus[] files = fs.listStatus(outPath.getParent()); for (int i = 0; i < files.length; i++) { if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) { fs.rename(files[i].getPath(), outPath); break; } } // Read resulting Matrix from HDFS DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols()); out.setConf(conf); return out; } return null; }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.util.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix multiplication A * B using MapReduce tasks on CPU or * GPU//ww w .ja va2s .com * * @param other a DistributedRowMatrix * @param outPath path to write result to * @param useGPU use GPU or CPU (default: false, use CPU) * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU) throws IOException, ClassNotFoundException, InterruptedException { // Check if cols of MatrixA = rows of MatrixB // (l x m) * (m x n) = (l x n) if (numCols != other.numRows()) { throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")"); } Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf(); // Debug // System.out.println("DistributedRowMatrix transposed:"); // transposed.printDistributedRowMatrix(); // Build MatrixMultiplication job configuration BSPJob job = null; if (!useGPU) { job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, this.rowPath, other.rowPath, outPath.getParent()); } else { // use GPU job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, this.rowPath, other.rowPath, outPath.getParent()); } // Multiply Matrix if (job.waitForCompletion(true)) { // Rename result file to output path Configuration conf = job.getConfiguration(); FileSystem fs = outPath.getFileSystem(conf); FileStatus[] files = fs.listStatus(outPath.getParent()); for (int i = 0; i < files.length; i++) { if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) { fs.rename(files[i].getPath(), outPath); break; } } // Read resulting Matrix from HDFS DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols()); out.setConf(conf); return out; } return null; }
From source file:azkaban.jobtype.javautils.HadoopUtils.java
License:Apache License
public static void saveProps(FileSystem fs, Props props, String file) throws IOException { Path path = new Path(file); // create directory if it does not exist. Path parent = path.getParent(); if (!fs.exists(parent)) fs.mkdirs(parent);// w w w .ja v a2 s. co m // write out properties OutputStream output = fs.create(path); try { props.storeFlattened(output); } finally { output.close(); } }
From source file:azkaban.viewer.hdfs.HdfsBrowserServlet.java
License:Apache License
private void getPathSegments(Path path, List<Path> paths, List<String> segments) { Path curr = path; while (curr.getParent() != null) { paths.add(curr);/*from w w w . j av a2s. c om*/ segments.add(curr.getName()); curr = curr.getParent(); } Collections.reverse(paths); Collections.reverse(segments); }
From source file:azkaban.web.pages.HdfsBrowserServlet.java
License:Apache License
private void displayDir(HttpServletRequest req, HttpServletResponse resp, Path path) throws IOException { Page page = newPage(req, resp, "azkaban/web/pages/hdfs_browser_dir.vm"); List<Path> paths = new ArrayList<Path>(); List<String> segments = new ArrayList<String>(); Path curr = path; while (curr.getParent() != null) { paths.add(curr);/* w ww .j av a 2s .c om*/ segments.add(curr.getName()); curr = curr.getParent(); } Collections.reverse(paths); Collections.reverse(segments); page.add("paths", paths); page.add("segments", segments); page.add("subdirs", _fs.listStatus(path)); // ??? line page.render(); }
From source file:babel.prep.datedcorpus.DatedLangFilesOutputFormat.java
License:Apache License
public RecordWriter<Text, Text> getBaseRecordWriter(final FileSystem fs, JobConf job, String name, final Progressable progress) throws IOException { final Path dumpFile = new Path(FileOutputFormat.getOutputPath(job), name); // Get the old copy out of the way if (fs.exists(dumpFile)) { fs.delete(dumpFile, true);//from ww w . j a v a 2 s .c o m } else { fs.mkdirs(dumpFile.getParent()); } return new RecordWriter<Text, Text>() { public synchronized void write(Text key, Text versText) throws IOException { try { BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(new File(dumpFile.toUri()), true), DEFAULT_CHARSET)); writer.write(versText.toString()); writer.close(); } catch (Exception e) { throw new RuntimeException("Error writing page versions: " + e.toString()); } } public synchronized void close(Reporter reporter) throws IOException { } }; }
From source file:backup.namenode.NameNodeBackupBlockCheckProcessor.java
License:Apache License
public static boolean shouldIgnore(Set<Path> pathSetToIgnore, Path path) { do {/* w w w .j av a 2 s.c o m*/ if (pathSetToIgnore.contains(path)) { return true; } path = path.getParent(); } while (path != null); return false; }
From source file:boa.datagen.MapFileGen.java
License:Apache License
public static void main(String[] args) throws Exception { if (SEQ_FILE_PATH.isEmpty()) { System.out.println("Missing path to sequence file. Please specify it in the properties file."); return;/*from w w w . java 2s . c om*/ } String base = "hdfs://boa-njt/"; Configuration conf = new Configuration(); conf.set("fs.default.name", base); FileSystem fs = FileSystem.get(conf); Path path = new Path(SEQ_FILE_PATH); String name = path.getName(); if (fs.isFile(path)) { if (path.getName().equals(MapFile.DATA_FILE_NAME)) { MapFile.fix(fs, path.getParent(), Text.class, BytesWritable.class, false, conf); } else { Path dataFile = new Path(path.getParent(), MapFile.DATA_FILE_NAME); fs.rename(path, dataFile); Path dir = new Path(path.getParent(), name); fs.mkdirs(dir); fs.rename(dataFile, new Path(dir, dataFile.getName())); MapFile.fix(fs, dir, Text.class, BytesWritable.class, false, conf); } } else { FileStatus[] files = fs.listStatus(path); for (FileStatus file : files) { path = file.getPath(); if (fs.isFile(path)) { Path dataFile = new Path(path.getParent(), MapFile.DATA_FILE_NAME); fs.rename(path, dataFile); MapFile.fix(fs, dataFile.getParent(), Text.class, BytesWritable.class, false, conf); break; } } } fs.close(); }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
/** * copies all files from the taskoutputpath to the outputpath * * @param conf//from www . j av a2 s. com */ public static void commitTask(JobConf conf) throws IOException { Path taskOutputPath = new Path(conf.get("mapred.work.output.dir")); FileSystem fs = getFSSafe(conf, taskOutputPath); if (fs == null) return; AtomicInteger integer = pathCounts.get(taskOutputPath.toString()); if (integer.decrementAndGet() != 0) return; String taskId = conf.get("mapred.task.id"); LOG.info("committing task: '" + taskId + "' - " + taskOutputPath); if (taskOutputPath != null) { if (writeDirectlyToWorkingPath(conf, taskOutputPath)) return; if (fs.exists(taskOutputPath)) { Path jobOutputPath = taskOutputPath.getParent().getParent(); // Move the task outputs to their final place moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutputPath, true)) LOG.info("failed to delete the temporary output directory of task: '" + taskId + "' - " + taskOutputPath); LOG.info("saved output of task '" + taskId + "' to " + jobOutputPath); } } }