List of usage examples for org.apache.hadoop.fs FileSystem makeQualified
public Path makeQualified(Path path)
From source file:com.asakusafw.testdriver.temporary.TemporaryOutputRetriever.java
License:Apache License
@Override public void truncate(TemporaryOutputDescription description, TestContext context) throws IOException { LOG.debug("Deleting output directory: {}", description); //$NON-NLS-1$ VariableTable variables = createVariables(context); Configuration config = configurations.newInstance(); FileSystem fs = FileSystem.get(config); String resolved = variables.parse(description.getPathPrefix(), false); Path path = new Path(resolved); Path output = path.getParent(); Path target;/*from www. ja v a 2s . c o m*/ if (output == null) { LOG.warn("Skipped deleting output directory because it is a base directory: {}", path); target = fs.makeQualified(path); } else { LOG.debug("Output directory will be deleted: {}", output); //$NON-NLS-1$ target = fs.makeQualified(output); } TemporaryInputPreparator.delete(fs, target); }
From source file:com.asakusafw.testdriver.testing.moderator.MockImporterPreparator.java
License:Apache License
@Override public void truncate(MockImporterDescription description, TestContext context) throws IOException { Configuration config = configurations.newInstance(); FileSystem fs = FileSystem.get(config); Path target = fs.makeQualified(new Path(description.getDirectory())); if (fs.exists(target)) { fs.delete(target, true);/* w ww .j a v a2 s . c o m*/ } }
From source file:com.asakusafw.windgate.hadoopfs.HadoopFsProfile.java
License:Apache License
private static Path extractBasePath(Configuration configuration, ResourceProfile profile) throws IOException { assert configuration != null; assert profile != null; String result = extract(profile, KEY_BASE_PATH, false); try {//w w w . j ava 2 s.co m if (result == null || result.isEmpty()) { FileSystem fileSystem = FileSystem.get(configuration); return fileSystem.getWorkingDirectory(); } URI uri = URI.create(result); FileSystem fileSystem = FileSystem.get(uri, configuration); return fileSystem.makeQualified(new Path(uri)); } catch (IOException e) { WGLOG.error(e, "E00002", profile.getName(), KEY_BASE_PATH, result == null ? "(default)" : result); throw new IOException(MessageFormat.format("Failed to initialize the file system: {1} (resource={0})", profile.getName(), KEY_BASE_PATH, result == null ? "(default)" : result), e); } }
From source file:com.asakusafw.workflow.hadoop.HadoopDelete.java
License:Apache License
private static void delete(Configuration conf, Path path) throws IOException { FileSystem fs = path.getFileSystem(conf); if (LOG.isDebugEnabled()) { LOG.debug("deleting file: {}", fs.makeQualified(path)); }/*from w w w .j av a2 s.c o m*/ boolean deleted = fs.delete(path, true); if (LOG.isDebugEnabled()) { if (deleted) { LOG.debug("delete success: {}", fs.makeQualified(path)); } else if (fs.exists(path)) { LOG.debug("delete failed: {}", fs.makeQualified(path)); } else { LOG.debug("target file is not found: {}", fs.makeQualified(path)); } } }
From source file:com.bah.lucene.hdfs.SoftlinkHdfsDirectory.java
License:Apache License
/** * Creates a new SoftlinkHdfsDirectory.// w w w . ja v a 2s. c om * * @param configuration * the {@link Configuration} object. * @param storePath * the path where the data is actually stored. * @param linkPath * the path where the links are stored. * @throws IOException */ public SoftlinkHdfsDirectory(Configuration configuration, Path storePath, Path linkPath) throws IOException { super(configuration, linkPath); FileSystem fileSystem = storePath.getFileSystem(configuration); _storePath = fileSystem.makeQualified(storePath); _linkPath = fileSystem.makeQualified(linkPath); }
From source file:com.benchmark.mapred.PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi// w w w . jav a 2 s.c o m */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories //final Path inDir = new Path(TMP_DIR, "in"); final Path inDir = new Path("/home/hadoop1/tmp_dir", "in"); System.out.println("inDir =" + inDir.toString()); //final Path outDir = new Path(TMP_DIR, "out"); final Path outDir = new Path("/home/hadoop1/tmp_dir", "out"); System.out.println("outDir =" + outDir.toString()); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java
License:Apache License
/** * Serializes the fList and returns the string representation of the List *//* ww w . j a v a 2s . c o m*/ public static void saveFList(Iterable<Pair<String, Long>> flist, Parameters params, Configuration conf) throws IOException { Path flistPath = new Path(params.get(OUTPUT), F_LIST); FileSystem fs = FileSystem.get(flistPath.toUri(), conf); flistPath = fs.makeQualified(flistPath); HadoopUtil.delete(conf, flistPath); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, flistPath, Text.class, LongWritable.class); try { for (Pair<String, Long> pair : flist) { writer.append(new Text(pair.getFirst()), new LongWritable(pair.getSecond())); } } finally { writer.close(); } DistributedCache.addCacheFile(flistPath.toUri(), conf); }
From source file:com.cg.mapreduce.myfpgrowth.PFPGrowth.java
License:Apache License
/** * Serializes the fList and returns the string representation of the List *///from w w w . j a va2s .com public static void saveFList(List<Pair<String, Long>> fList, Parameters params, Configuration conf) throws IOException { Path flistPath = new Path(params.get(OUTPUT) + "/oldlist", F_LIST); FileSystem fs = FileSystem.get(flistPath.toUri(), conf); flistPath = fs.makeQualified(flistPath); HadoopUtil.delete(conf, flistPath); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, flistPath, Text.class, Pair.class); // set param to control group size in MR jobs int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT); int maxPerGroup = fList.size() / numGroups; if (fList.size() % numGroups != 0) { maxPerGroup++; } params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup)); try { int group = 0; int count = 0; for (Pair<String, Long> pair : fList) { if (count == maxPerGroup) { group++; count = 0; } writer.append(new Text(pair.getFirst()), new Pair<Integer, Long>(group, pair.getSecond())); //writer.append(new Text(pair.getFirst()), new LongWritable(pair.getSecond())); } } finally { writer.close(); } DistributedCache.addCacheFile(flistPath.toUri(), conf); }
From source file:com.chinamobile.bcbsp.client.BSPJobClient.java
License:Apache License
/** * Submit a new job to run.//from ww w . j a v a2 s. co m * @param job BSPJob * @return Review comments: (1)The content of submitJobDir is decided by the * client. I think it is dangerous because two different clients maybe * generate the same submitJobDir. Review time: 2011-11-30; Reviewer: * Hongxu Zhang. Fix log: (1)In order to avoid the conflict, I use the * jobId to generate the submitJobDir. Because the jobId is unique so * this problem can be solved. Fix time: 2011-12-04; Programmer: * Zhigang Wang. Review comments: (2)There, the client must submit * relative information about the job. There maybe some exceptions * during this process. When exceptions occur, this job should not be * executed and the relative submitJobDir must be cleanup. Review * time: 2011-12-04; Reviewer: Hongxu Zhang. Fix log: (2)The process * of submiting files has been surrounded by try-catch. The * submitJobDir will be cleanup in the catch process. Fix time: * 2011-12-04; Programmer: Zhigang Wang. */ public RunningJob submitJobInternal(BSPJob job) { BSPJobID jobId = null; Path submitJobDir = null; try { jobId = jobSubmitClient.getNewJobId(); submitJobDir = new Path(getSystemDir(), "submit_" + jobId.toString()); Path submitJarFile = null; LOG.info("debug: job type is " + job.getJobType()); if (Constants.USER_BC_BSP_JOB_TYPE_C.equals(job.getJobType())) { submitJarFile = new Path(submitJobDir, "jobC"); LOG.info("debug:" + submitJarFile.toString()); } else { LOG.info("debug: before submitJarFile = new " + "Path(submitJobDir,job.jar);"); submitJarFile = new Path(submitJobDir, "job.jar"); LOG.info("debug:" + submitJarFile.toString()); } Path submitJobFile = new Path(submitJobDir, "job.xml"); Path submitSplitFile = new Path(submitJobDir, "job.split"); // set this user's id in job configuration, so later job files can // be accessed using this user's id UnixUserGroupInformation ugi = getUGI(job.getConf()); // Create a number of filenames in the BSPController's fs namespace FileSystem files = getFs(); files.delete(submitJobDir, true); submitJobDir = files.makeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.toUri().getPath()); BSPFsPermission bspSysPerms = new BSPFspermissionImpl(2); FileSystem.mkdirs(files, submitJobDir, bspSysPerms.getFp()); files.mkdirs(submitJobDir); short replication = (short) job.getInt("bsp.submit.replication", 10); String originalJarPath = null; LOG.info("debug: job type is " + job.getJobType()); if (Constants.USER_BC_BSP_JOB_TYPE_C.equals(job.getJobType())) { LOG.info("debug: originalJarPath = job.getJobExe();" + job.getJobExe()); originalJarPath = job.getJobExe(); LOG.info("debug:" + submitJarFile.toString()); job.setJobExe(submitJarFile.toString()); } else { LOG.info("debug: jar"); originalJarPath = job.getJar(); job.setJar(submitJarFile.toString()); } if (originalJarPath != null) { // copy jar to BSPController's fs // use jar name if job is not named. if ("".equals(job.getJobName())) { job.setJobName(new Path(originalJarPath).getName()); } // job.setJar(submitJarFile.toString()); fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile); fs.setReplication(submitJarFile, replication); fs.setPermission(submitJarFile, new BSPFspermissionImpl(0).getFp()); } else { LOG.warn("No job jar file set. User classes may not be found. " + "See BSPJob#setJar(String) or check Your jar file."); } // Set the user's name and working directory job.setUser(ugi.getUserName()); if (ugi.getGroupNames().length > 0) { job.set("group.name", ugi.getGroupNames()[0]); } if (new BSPHdfsImpl().getWorkingDirectory() == null) { job.setWorkingDirectory(fs.getWorkingDirectory()); } int maxClusterStaffs = jobSubmitClient.getClusterStatus(false).getMaxClusterStaffs(); if (job.getNumPartition() == 0) { job.setNumPartition(maxClusterStaffs); } if (job.getNumPartition() > maxClusterStaffs) { job.setNumPartition(maxClusterStaffs); } job.setNumBspStaff(job.getNumPartition()); int splitNum = 0; splitNum = writeSplits(job, submitSplitFile); if (splitNum > job.getNumPartition() && splitNum <= maxClusterStaffs) { job.setNumPartition(splitNum); job.setNumBspStaff(job.getNumPartition()); } if (splitNum > maxClusterStaffs) { LOG.error("Sorry, the number of files is more than maxClusterStaffs:" + maxClusterStaffs); throw new IOException("Could not launch job"); } job.set(Constants.USER_BC_BSP_JOB_SPLIT_FILE, submitSplitFile.toString()); LOG.info("[Max Staff Number] " + maxClusterStaffs); LOG.info("The number of splits for the job is: " + splitNum); LOG.info("The number of staffs for the job is: " + job.getNumBspStaff()); BSPFSDataOutputStream bspout = new BSPFSDataOutputStreamImpl(fs, submitJobFile, new BSPFspermissionImpl(0).getFp()); try { job.writeXml(bspout.getOut()); } finally { bspout.close(); } // Now, actually submit the job (using the submit name) JobStatus status = jobSubmitClient.submitJob(jobId, submitJobFile.toString()); if (status != null) { return new NetworkedJob(status); } else { throw new IOException("Could not launch job"); } } catch (FileNotFoundException fnfE) { LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", fnfE); Fault f = new Fault(Fault.Type.SYSTEMSERVICE, Fault.Level.INDETERMINATE, "null", fnfE.toString()); jobSubmitClient.recordFault(f); jobSubmitClient.recovery(jobId); try { FileSystem files = getFs(); files.delete(submitJobDir, true); } catch (IOException e) { //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir); throw new RuntimeException("Failed to cleanup the submitJobDir", e); } return null; } catch (ClassNotFoundException cnfE) { LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", cnfE); Fault f = new Fault(Fault.Type.SYSTEMSERVICE, Fault.Level.WARNING, "null", cnfE.toString()); jobSubmitClient.recordFault(f); jobSubmitClient.recovery(jobId); try { FileSystem files = getFs(); files.delete(submitJobDir, true); } catch (IOException e) { //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir); throw new RuntimeException("Failed to cleanup the submitJobDir", e); } return null; } catch (InterruptedException iE) { LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", iE); Fault f = new Fault(Fault.Type.SYSTEMSERVICE, Fault.Level.CRITICAL, "null", iE.toString()); jobSubmitClient.recordFault(f); jobSubmitClient.recovery(jobId); try { FileSystem files = getFs(); files.delete(submitJobDir, true); } catch (IOException e) { //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir); throw new RuntimeException("Failed to cleanup the submitJobDir", e); } return null; } catch (Exception ioE) { LOG.error("Exception has been catched in BSPJobClient--submitJobInternal !", ioE); Fault f = new Fault(Fault.Type.DISK, Fault.Level.CRITICAL, "null", ioE.toString()); jobSubmitClient.recordFault(f); jobSubmitClient.recovery(jobId); try { FileSystem files = getFs(); files.delete(submitJobDir, true); } catch (IOException e) { //LOG.error("Failed to cleanup the submitJobDir:" + submitJobDir); throw new RuntimeException("Failed to cleanup the submitJobDir", e); } return null; } }
From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java
License:Apache License
/** * Configure <code>job</code> with a TotalOrderPartitioner, partitioning * against <code>splitPoints</code>. Cleans up the partitions file after job * exists./* w w w. j a va 2 s .com*/ */ static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException { // create the partitions file FileSystem fs = FileSystem.get(job.getConfiguration()); Path partitionsPath = new Path("/tmp", "partitions_" + UUID.randomUUID()); fs.makeQualified(partitionsPath); fs.deleteOnExit(partitionsPath); writePartitions(job.getConfiguration(), partitionsPath, splitPoints); // configure job to use it job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionsPath); }