List of usage examples for org.apache.hadoop.fs Path makeQualified
@Deprecated
public Path makeQualified(FileSystem fs)
From source file:com.aliyun.fs.oss.nat.NativeOssFileSystem.java
License:Apache License
private FileStatus newFile(FileMetadata meta, Path path) { return new FileStatus(meta.getLength(), false, 1, MAX_OSS_FILE_SIZE, meta.getLastModified(), path.makeQualified(this)); }
From source file:com.aliyun.fs.oss.nat.NativeOssFileSystem.java
License:Apache License
private FileStatus newDirectory(Path path) { return new FileStatus(0, true, 1, MAX_OSS_FILE_SIZE, 0, path.makeQualified(this)); }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceProfile.java
License:Apache License
/** * Converts the {@link DirectDataSourceProfile} into this profile. * @param profile target profile/*from w ww . j a va2 s. c om*/ * @param conf Hadoop configuration * @return the converted profile * @throws IOException if failed to convert * @throws IllegalArgumentException if some parameters were {@code null} */ public static HadoopDataSourceProfile convert(DirectDataSourceProfile profile, Configuration conf) throws IOException { if (profile == null) { throw new IllegalArgumentException("profile must not be null"); //$NON-NLS-1$ } if (conf == null) { throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$ } Map<String, String> attributes = new HashMap<>(profile.getAttributes()); Path fsPath = takeFsPath(profile, attributes, conf); if (fsPath == null) { throw new IOException(MessageFormat.format( "The directio configuration \"{0} ({1})\" does not have \"{2}\"", profile.getId(), profile.getPath().isEmpty() ? ROOT_REPRESENTATION : profile.getPath(), fqn(profile, KEY_PATH))); } Path tempPath = takeTempPath(profile, attributes, conf, fsPath); FileSystem fileSystem = fsPath.getFileSystem(conf); FileSystem tempFs = tempPath.getFileSystem(conf); if (getFsIdentity(fileSystem).equals(getFsIdentity(tempFs)) == false) { throw new IOException(MessageFormat.format( "The directio target and temporary path must be on same file system ({0}={1} <=> {2}={3})", fqn(profile, KEY_PATH), fsPath, fqn(profile, KEY_TEMP), tempPath)); } fsPath = fsPath.makeQualified(fileSystem); tempPath = tempPath.makeQualified(fileSystem); HadoopDataSourceProfile result = new HadoopDataSourceProfile(conf, profile.getId(), profile.getPath(), fsPath, tempPath); long minFragment = takeMinFragment(profile, attributes, conf); result.setMinimumFragmentSize(minFragment); long prefFragment = takePrefFragment(profile, attributes, conf); result.setPreferredFragmentSize(prefFragment); result.setOutputStaging(takeBoolean(profile, attributes, KEY_OUTPUT_STAGING, DEFAULT_OUTPUT_STAGING)); result.setOutputStreaming(takeBoolean(profile, attributes, KEY_OUTPUT_STREAMING, DEFAULT_OUTPUT_STREAMING)); result.setSplitBlocks(takeBoolean(profile, attributes, KEY_SPLIT_BLOCKS, DEFAULT_SPLIT_BLOCKS)); result.setCombineBlocks(takeBoolean(profile, attributes, KEY_COMBINE_BLOCKS, DEFAULT_COMBINE_BLOCKS)); result.setKeepAliveInterval(takeKeepAliveInterval(profile, attributes, conf)); if (attributes.isEmpty() == false) { throw new IOException(MessageFormat.format("Unknown attributes in \"{0}\": {1}", profile.getId(), new TreeSet<>(attributes.keySet()))); } return result; }
From source file:com.benchmark.mapred.Sort.java
License:Apache License
/** * The main driver for sort program.// ww w. ja v a2s . c o m * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), Sort.class); jobConf.setJobName("sorter"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = BytesWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { jobConf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(num_reduces); jobConf.setInputFormat(inputFormatClass); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(jobConf, otherArgs.get(0)); FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); jobConf.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(jobConf)[0]; inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); InputSampler.<K, V>writePartitionFile(jobConf, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, jobConf); DistributedCache.createSymlink(jobConf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:com.benchmark.mapred.terasort.TeraSort.java
License:Apache License
public int run(String[] args) throws Exception { LOG.info("starting"); JobConf job = (JobConf) getConf();//from www .ja va 2 s . c o m Path inputDir = new Path(args[0]); if (args.length != 3) { System.out.println("ERROR: Wrong number of parameters: " + args.length + " instead of 3."); } inputDir = inputDir.makeQualified(inputDir.getFileSystem(job)); Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(TeraInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); job.setPartitionerClass(TotalOrderPartitioner.class); job.setNumReduceTasks(Integer.parseInt(args[2])); TeraInputFormat.writePartitionFile(job, partitionFile); DistributedCache.addCacheFile(partitionUri, job); DistributedCache.createSymlink(job); job.setInt("dfs.replication", 1); TeraOutputFormat.setFinalSync(job, true); Date startIteration = new Date(); JobClient.runJob(job); Date endIteration = new Date(); System.out.println( "The iteration took " + (endIteration.getTime() - startIteration.getTime()) / 1000 + " seconds."); LOG.info("done"); return 0; }
From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java
License:Apache License
@Override public FileStatus[] listStatus(Path f) throws IOException { Path absolutePath = makeAbsolute(f); INode inode = store.retrieveINode(absolutePath); if (inode == null) { throw new FileNotFoundException("File " + f + " does not exist."); }/*w w w . j av a 2 s . c o m*/ if (inode.isFile()) { return new FileStatus[] { new RadosFileStatus(f.makeQualified(this), inode) }; } ArrayList<FileStatus> ret = new ArrayList<FileStatus>(); for (Path p : store.listSubPaths(absolutePath)) { ret.add(getFileStatus(p.makeQualified(this))); } return ret.toArray(new FileStatus[0]); }
From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java
License:Apache License
/** * FileStatus for S3 file systems. /*from w w w. j a v a2 s . co m*/ */ @Override public FileStatus getFileStatus(Path f) throws IOException { INode inode = store.retrieveINode(makeAbsolute(f)); if (inode == null) { throw new FileNotFoundException(f + ": No such file or directory."); } return new RadosFileStatus(f.makeQualified(this), inode); }
From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java
License:Apache License
/** * Add a {@link Path} to the list of inputs for the BC_BSP job. * * @param job// ww w. j a v a 2 s .c om * the current job BSPJob. * @param path * {@link Path} to be added to the list of inputs for the BC_BSP job. */ public static void addInputPath(BSPJob job, Path path) throws IOException { Configuration conf = job.getConf(); FileSystem fs = FileSystem.get(conf); path = path.makeQualified(fs); String dirStr = StringUtils.escapeString(path.toString()); String dirs = conf.get(Constants.USER_BC_BSP_JOB_INPUT_DIR); conf.set(Constants.USER_BC_BSP_JOB_INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr); }
From source file:com.ci.backports.hadoop.hbase.ZHFileOutputFormat.java
License:Apache License
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This/* w w w .j a v a 2s. c o m*/ * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match ZHFileOutputFormat's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * ZPutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, HTable table) throws IOException { Configuration conf = job.getConfiguration(); job.setPartitionerClass(TotalOrderPartitioner.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(ZHFileOutputFormat.class); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(ZPutSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } LOG.info("Looking up current regions for table " + table); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis()); LOG.info("Writing partition information to " + partitionsPath); FileSystem fs = partitionsPath.getFileSystem(conf); writePartitions(conf, partitionsPath, startKeys); partitionsPath.makeQualified(fs); URI cacheUri; try { cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH); } catch (URISyntaxException e) { throw new IOException(e); } DistributedCache.addCacheFile(cacheUri, conf); DistributedCache.createSymlink(conf); LOG.info("Incremental table output configured."); }
From source file:com.cloudera.sqoop.mapreduce.MergeJob.java
License:Apache License
public boolean runMergeJob() throws IOException { Configuration conf = options.getConf(); Job job = new Job(conf); String userClassName = options.getClassName(); if (null == userClassName) { // Shouldn't get here. throw new IOException("Record class name not specified with " + "--class-name."); }//w w w . jav a 2 s . c o m // Set the external jar to use for the job. String existingJar = options.getExistingJarName(); if (existingJar != null) { // User explicitly identified a jar path. LOG.debug("Setting job jar to user-specified jar: " + existingJar); job.getConfiguration().set("mapred.jar", existingJar); } else { // Infer it from the location of the specified class, if it's on the // classpath. try { Class<? extends Object> userClass = conf.getClassByName(userClassName); if (null != userClass) { String userJar = Jars.getJarPathForClass(userClass); LOG.debug("Setting job jar based on user class " + userClassName + ": " + userJar); job.getConfiguration().set("mapred.jar", userJar); } else { LOG.warn("Specified class " + userClassName + " is not in a jar. " + "MapReduce may not find the class"); } } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } } try { Path oldPath = new Path(options.getMergeOldPath()); Path newPath = new Path(options.getMergeNewPath()); Configuration jobConf = job.getConfiguration(); FileSystem fs = FileSystem.get(jobConf); oldPath = oldPath.makeQualified(fs); newPath = newPath.makeQualified(fs); FileInputFormat.addInputPath(job, oldPath); FileInputFormat.addInputPath(job, newPath); jobConf.set(MERGE_OLD_PATH_KEY, oldPath.toString()); jobConf.set(MERGE_NEW_PATH_KEY, newPath.toString()); jobConf.set(MERGE_KEY_COL_KEY, options.getMergeKeyCol()); jobConf.set(MERGE_SQOOP_RECORD_KEY, userClassName); FileOutputFormat.setOutputPath(job, new Path(options.getTargetDir())); if (ExportJobBase.isSequenceFiles(jobConf, newPath)) { job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MergeRecordMapper.class); } else { job.setMapperClass(MergeTextMapper.class); job.setOutputFormatClass(RawKeyTextOutputFormat.class); } jobConf.set("mapred.output.key.class", userClassName); job.setOutputValueClass(NullWritable.class); job.setReducerClass(MergeReducer.class); // Set the intermediate data types. job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(MergeRecord.class); // Make sure Sqoop and anything else we need is on the classpath. cacheJars(job, null); return this.runJob(job); } catch (InterruptedException ie) { throw new IOException(ie); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } }