List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:at.illecker.hama.hybrid.examples.testrootbeer.TestRootbeerHybridBSP.java
License:Apache License
private static void prepareInput(Configuration conf, Path inputPath, int n, int maxVal) throws IOException { FileSystem fs = inputPath.getFileSystem(conf); // Create input file writers depending on bspTaskNum int bspTaskNum = conf.getInt("bsp.peers.num", 1); SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum]; for (int i = 0; i < bspTaskNum; i++) { Path inputFile = new Path(inputPath, "input" + i + ".seq"); LOG.info("inputFile: " + inputFile.toString()); inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, IntWritable.class, CompressionType.NONE);/* w w w . j a v a 2s . com*/ } // Write random values to input files IntWritable key = new IntWritable(); IntWritable value = new IntWritable(); Random r = new Random(); for (int i = 0; i < n; i++) { key.set(i); value.set(r.nextInt(maxVal)); for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].append(key, value); } } // Close file writers for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].close(); } }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.cpu.MatrixMultiplicationBSPCpu.java
License:Apache License
public static BSPJob createMatrixMultiplicationBSPCpuConf(Configuration conf, Path aPath, Path bPath, Path outPath) throws IOException { BSPJob job = new BSPJob(new HamaConfiguration(conf)); // Set the job name job.setJobName("MatrixMultiplicationBSP CPU"); // set the BSP class which shall be executed job.setBspClass(MatrixMultiplicationBSPCpu.class); // help Hama to locale the jar to be distributed job.setJarByClass(MatrixMultiplicationBSPCpu.class); job.setInputFormat(SequenceFileInputFormat.class); job.setInputPath(aPath);// ww w . jav a2 s . c o m job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PipesVectorWritable.class); job.setOutputPath(outPath); job.set(CONF_MATRIX_MULT_B_PATH, bPath.toString()); job.set("bsp.child.java.opts", "-Xmx4G"); // Order message by row index job.set(MessageManager.RECEIVE_QUEUE_TYPE_CLASS, "org.apache.hama.bsp.message.queue.SortedMemoryQueue"); LOG.info("DEBUG: NumBspTask: " + job.getNumBspTask()); // "bsp.peers.num" LOG.info("DEBUG: bsp.job.split.file: " + job.get("bsp.job.split.file")); LOG.info("DEBUG: bsp.tasks.maximum: " + job.get("bsp.tasks.maximum")); LOG.info("DEBUG: bsp.input.dir: " + job.get("bsp.input.dir")); LOG.info("DEBUG: bsp.join.expr: " + job.get("bsp.join.expr")); return job; }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.gpu.MatrixMultiplicationBSPGpu.java
License:Apache License
public static BSPJob createMatrixMultiplicationBSPGpuConf(Configuration conf, Path aPath, Path bPath, Path outPath) throws IOException { BSPJob job = new BSPJob(new HamaConfiguration(conf)); // Set the job name job.setJobName("MatrixMultiplicationBSP GPU"); // set the BSP class which shall be executed job.setBspClass(MatrixMultiplicationBSPGpu.class); // help Hama to locale the jar to be distributed job.setJarByClass(MatrixMultiplicationBSPGpu.class); job.setInputFormat(SequenceFileInputFormat.class); job.setInputPath(aPath);/*from www . jav a 2 s . co m*/ job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setOutputPath(outPath); job.set(CONF_MATRIX_MULT_B_PATH, bPath.toString()); job.set("bsp.child.java.opts", "-Xmx4G"); // Order message by row index // job.set(MessageManager.TRANSFER_QUEUE_TYPE_CLASS, // "org.apache.hama.bsp.message.queue.SortedMemoryQueueTransfer"); LOG.info("DEBUG: NumBspTask: " + job.getNumBspTask()); LOG.info("DEBUG: bsp.job.split.file: " + job.get("bsp.job.split.file")); LOG.info("DEBUG: bsp.tasks.maximum: " + job.get("bsp.tasks.maximum")); LOG.info("DEBUG: bsp.input.dir: " + job.get("bsp.input.dir")); return job; }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.gpu.MatrixMultiplicationBSPGpuNew.java
License:Apache License
public static BSPJob createMatrixMultiplicationBSPGpuConf(Configuration conf, Path aPath, Path bPath, Path outPath) throws IOException { BSPJob job = new BSPJob(new HamaConfiguration(conf)); // Set the job name job.setJobName("MatrixMultiplicationBSP GPU"); // set the BSP class which shall be executed job.setBspClass(MatrixMultiplicationBSPGpuNew.class); // help Hama to locale the jar to be distributed job.setJarByClass(MatrixMultiplicationBSPGpuNew.class); job.setInputFormat(SequenceFileInputFormat.class); job.setInputPath(aPath);/*from w w w .j a v a2s . c o m*/ job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setOutputPath(outPath); job.set(CONF_MATRIX_MULT_B_PATH, bPath.toString()); job.set("bsp.child.java.opts", "-Xmx4G"); // Order message by row index // job.set(MessageManager.TRANSFER_QUEUE_TYPE_CLASS, // "org.apache.hama.bsp.message.queue.SortedMemoryQueueTransfer"); LOG.info("DEBUG: NumBspTask: " + job.getNumBspTask()); LOG.info("DEBUG: bsp.job.split.file: " + job.get("bsp.job.split.file")); LOG.info("DEBUG: bsp.tasks.maximum: " + job.get("bsp.tasks.maximum")); LOG.info("DEBUG: bsp.input.dir: " + job.get("bsp.input.dir")); return job; }
From source file:awshamondsidefunctions.CopyToS3.java
License:Apache License
public static void copyToS3(String outPut) throws IOException, InterruptedException { String userName = HadoopUser.getHadoopUser(); //get output file name String outputName = new Path(outPut).getName(); Path outputPath = new Path(outPut).getParent(); //mapreduce single output file back to s3 using user specified output file name String copyCommand[] = { "hadoop", "distcp", "/user/" + userName + "/Hamond/" + outputName, outputPath.toString() }; Process p = Runtime.getRuntime().exec(copyCommand); p.waitFor();//from w w w .ja v a 2 s. co m }
From source file:azkaban.viewer.hdfs.HdfsBrowserServlet.java
License:Apache License
private void handleFsDisplay(String user, HttpServletRequest req, HttpServletResponse resp, Session session) throws IOException, ServletException, IllegalArgumentException, IllegalStateException { FileSystem fs = null;//from www . j a v a2 s .c om try { fs = getFileSystem(user); } catch (HadoopSecurityManagerException e) { errorPage(user, req, resp, session, "Cannot get FileSystem."); return; } Path path = getPath(req); if (logger.isDebugEnabled()) { logger.debug("path: '" + path.toString() + "'"); } try { if (!fs.exists(path)) { errorPage(user, req, resp, session, path.toUri().getPath() + " does not exist."); fs.close(); return; } } catch (IOException ioe) { logger.error("Got exception while checking for existence of path '" + path + "'", ioe); errorPage(user, req, resp, session, path.toUri().getPath() + " Encountered error while trying to detect if path '" + path + "' exists. Reason: " + ioe.getMessage()); fs.close(); return; } if (fs.isFile(path)) { displayFilePage(fs, user, req, resp, session, path); } else if (fs.getFileStatus(path).isDir()) { displayDirPage(fs, user, req, resp, session, path); } else { errorPage(user, req, resp, session, "It exists, it is not a file, and it is not a directory, what " + "is it precious?"); } fs.close(); }
From source file:azkaban.viewer.hdfs.HdfsBrowserServlet.java
License:Apache License
private void displayFilePage(FileSystem fs, String user, HttpServletRequest req, HttpServletResponse resp, Session session, Path path) { Page page = newPage(req, resp, session, "azkaban/viewer/hdfs/velocity/hdfs-file.vm"); List<Path> paths = new ArrayList<Path>(); List<String> segments = new ArrayList<String>(); getPathSegments(path, paths, segments); page.add("allowproxy", allowGroupProxy); page.add("viewerPath", viewerPath); page.add("viewerName", viewerName); page.add("paths", paths); page.add("segments", segments); page.add("user", user); page.add("path", path.toString()); page.add("homedir", getHomeDir(fs)); try {//from ww w. ja va 2 s .c o m boolean hasSchema = false; int viewerId = -1; for (int i = 0; i < viewers.size(); ++i) { HdfsFileViewer viewer = viewers.get(i); Set<Capability> capabilities = EnumSet.noneOf(Capability.class); capabilities = viewer.getCapabilities(fs, path); if (capabilities.contains(Capability.READ)) { if (capabilities.contains(Capability.SCHEMA)) { hasSchema = true; } viewerId = i; break; } } page.add("viewerId", viewerId); page.add("hasSchema", hasSchema); FileStatus status = fs.getFileStatus(path); page.add("status", status); } catch (Exception ex) { page.add("no_fs", "true"); page.add("error_message", "Error: " + ex.getMessage()); } page.render(); }
From source file:azure.TweetUpload.java
License:Apache License
public static void main(String[] args) { try { String filePath = "hdfs://localhost.localdomain:8020/tmp/hive-mapred/" + args[0] + "/000000_0"; // File location Configuration configuration = new Configuration(); Path path = new Path(filePath); Path newFilePath = new Path("temp_" + args[0]); FileSystem fs = path.getFileSystem(configuration); /* ww w .j a va 2 s. c o m*/ fs.copyToLocalFile(path, newFilePath); // Copy temporary to local directory CloudStorageAccount account = CloudStorageAccount .parse(storageConnectionString); CloudBlobClient serviceClient = account.createCloudBlobClient(); CloudBlobContainer container = serviceClient .getContainerReference("container_name_here"); // Container name (must be lower case) container.createIfNotExists(); // Upload file CloudBlockBlob blob = container .getBlockBlobReference("user/rdp_username_here/analysisFiles/" + args[0] + ".tsv"); File sourceFile = new File(newFilePath.toString()); blob.upload(new FileInputStream(sourceFile), sourceFile.length()); File tmpFile = new File(newFilePath.toString()); tmpFile.delete(); // Delete the temporary file // In case of errors } catch (Exception e) { System.exit(-1); } }
From source file:babel.prep.PrepStep.java
License:Apache License
protected void setUniqueTempDir(JobConf job) { Path tempDir = new Path( getConf().get("hadoop.tmp.dir", ".") + "/" + java.util.UUID.randomUUID().toString()); job.set("hadoop.tmp.dir", tempDir.toString()); }
From source file:bdss.cmu.edu.Sort.java
License:Apache License
/** * The main driver for sort program./* www . j a v a2 s .com*/ * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = conf.get(REDUCES_PER_HOST); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = BytesWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job = new Job(conf); job.setJobName("sorter"); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(num_reduces); job.setInputFormatClass(inputFormatClass); job.setOutputFormatClass(outputFormatClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(job, otherArgs.get(0)); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); job.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(job)[0]; inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionFile); InputSampler.<K, V>writePartitionFile(job, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, conf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }