List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:azkaban.jobtype.javautils.HadoopUtils.java
License:Apache License
public static JobConf addAllSubPaths(JobConf conf, Path path) throws IOException { if (shouldPathBeIgnored(path)) { throw new IllegalArgumentException(String.format("Path[%s] should be ignored.", path)); }/*from w w w. ja va2s . c o m*/ final FileSystem fs = path.getFileSystem(conf); if (fs.exists(path)) { for (FileStatus status : fs.listStatus(path)) { if (!shouldPathBeIgnored(status.getPath())) { if (status.isDir()) { addAllSubPaths(conf, status.getPath()); } else { FileInputFormat.addInputPath(conf, status.getPath()); } } } } return conf; }
From source file:azkaban.jobtype.javautils.HadoopUtils.java
License:Apache License
public static void saveProps(Props props, String file) throws IOException { Path path = new Path(file); FileSystem fs = null;/*from ww w. jav a 2 s . c o m*/ fs = path.getFileSystem(new Configuration()); saveProps(fs, props, file); }
From source file:azure.TweetUpload.java
License:Apache License
public static void main(String[] args) { try { String filePath = "hdfs://localhost.localdomain:8020/tmp/hive-mapred/" + args[0] + "/000000_0"; // File location Configuration configuration = new Configuration(); Path path = new Path(filePath); Path newFilePath = new Path("temp_" + args[0]); FileSystem fs = path.getFileSystem(configuration); /*from w ww . ja v a 2 s.co m*/ fs.copyToLocalFile(path, newFilePath); // Copy temporary to local directory CloudStorageAccount account = CloudStorageAccount .parse(storageConnectionString); CloudBlobClient serviceClient = account.createCloudBlobClient(); CloudBlobContainer container = serviceClient .getContainerReference("container_name_here"); // Container name (must be lower case) container.createIfNotExists(); // Upload file CloudBlockBlob blob = container .getBlockBlobReference("user/rdp_username_here/analysisFiles/" + args[0] + ".tsv"); File sourceFile = new File(newFilePath.toString()); blob.upload(new FileInputStream(sourceFile), sourceFile.length()); File tmpFile = new File(newFilePath.toString()); tmpFile.delete(); // Delete the temporary file // In case of errors } catch (Exception e) { System.exit(-1); } }
From source file:backup.store.ExternalExtendedBlockSort.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Path dir = new Path("file:///home/apm/Development/git-projects/hdfs-backup/hdfs-backup-core/tmp"); dir.getFileSystem(conf).delete(dir, true); long start = System.nanoTime(); try (ExternalExtendedBlockSort<LongWritable> sort = new ExternalExtendedBlockSort<>(conf, dir, LongWritable.class)) { Random random = new Random(); for (int bp = 0; bp < 1; bp++) { String bpid = UUID.randomUUID().toString(); for (int i = 0; i < 10000000; i++) { // for (int i = 0; i < 10; i++) { long genstamp = random.nextInt(20000); long blockId = random.nextLong(); ExtendedBlock extendedBlock = new ExtendedBlock(bpid, blockId, random.nextInt(Integer.MAX_VALUE), genstamp); sort.add(extendedBlock, new LongWritable(blockId)); }/*from w w w . j a v a 2s . co m*/ } System.out.println("finished"); sort.finished(); System.out.println("interate"); for (String blockPoolId : sort.getBlockPoolIds()) { ExtendedBlockEnum<LongWritable> blockEnum = sort.getBlockEnum(blockPoolId); ExtendedBlock block; long l = 0; while ((block = blockEnum.next()) != null) { // System.out.println(block); long blockId = block.getBlockId(); l += blockId; LongWritable currentValue = blockEnum.currentValue(); if (currentValue.get() != blockId) { System.err.println("Error " + blockId); } } System.out.println(l); } } long end = System.nanoTime(); System.out.println("Time [" + (end - start) / 1000000.0 + " ms]"); }
From source file:backup.store.ExternalExtendedBlockSort.java
License:Apache License
public ExtendedBlockEnum<T> getBlockEnum(String blockPoolId) throws Exception { Path output = getOutputFilePath(blockPoolId); FileSystem fileSystem = output.getFileSystem(conf); if (!fileSystem.exists(output)) { return null; }/*from www.ja va 2 s.c om*/ return new BlockEnum(blockPoolId, new Reader(conf, Reader.file(output))); }
From source file:backup.store.ExternalExtendedBlockSort.java
License:Apache License
private synchronized void sortIfNeeded() throws IOException { for (String blockPoolId : writers.keySet()) { Path output = getOutputFilePath(blockPoolId); Path input = getInputFilePath(blockPoolId); FileSystem fileSystem = output.getFileSystem(conf); if (!fileSystem.exists(output) && fileSystem.exists(input)) { LocalFileSystem local = FileSystem.getLocal(conf); SequenceFile.Sorter sorter = new Sorter(local, ComparableBlock.class, dataClass, conf); sorter.sort(input, output);/*from w ww .j a va 2 s . co m*/ } } }
From source file:bdss.cmu.edu.Sort.java
License:Apache License
/** * The main driver for sort program.//www .j av a 2 s. c om * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = conf.get(REDUCES_PER_HOST); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = BytesWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job = new Job(conf); job.setJobName("sorter"); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(num_reduces); job.setInputFormatClass(inputFormatClass); job.setOutputFormatClass(outputFormatClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(job, otherArgs.get(0)); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); job.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(job)[0]; inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionFile); InputSampler.<K, V>writePartitionFile(job, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, conf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:be.uantwerpen.adrem.bigfim.AprioriPhaseReducer.java
License:Apache License
private void getBaseDirs(Context context) { try {// w w w . j av a 2 s . c om String dir = getJobAbsoluteOutputDir(context); baseDir = dir.isEmpty() ? "tmp" : dir; Path path = new Path(context.getConfiguration().get("mapred.output.dir")); FileSystem fs = path.getFileSystem(context.getConfiguration()); if (fs.getFileStatus(path) != null) { aprioriPhase = fs.getFileStatus(path).getPath().getName().split("-")[0].substring(2); } } catch (IOException e) { e.printStackTrace(); } }
From source file:be.uantwerpen.adrem.bigfim.AprioriPhaseReducer.java
License:Apache License
private int getLargestIndex(Configuration conf, Path path, String prefix, int index) { int largestIx = -1; try {//from w ww. j a v a2 s . c om FileSystem fs = path.getFileSystem(conf); for (FileStatus file : fs.listStatus(path, new NameStartsWithFilter(prefix))) { largestIx = max(largestIx, parseInt(file.getPath().getName().split("-")[index])); } } catch (NumberFormatException e) { } catch (IOException e) { } return largestIx; }
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private boolean canStartPrefixGeneration(FIMOptions opt, int phase) throws IOException { Path path = new Path(opt.outputDir + separator + "tg" + phase); return phase >= opt.prefixLength && path.getFileSystem(new Configuration()).exists(path); }