List of usage examples for org.apache.hadoop.fs FileSystem makeQualified
public Path makeQualified(Path path)
From source file:com.ebay.erl.mobius.core.JobSetup.java
License:Apache License
private static void ensureOutputDelete(Path outputFolder, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); outputFolder = fs.makeQualified(outputFolder); if (fs.exists(outputFolder)) { LOGGER.info("Deleting " + outputFolder.toString()); fs.delete(outputFolder, true);// ww w .j a va 2 s. co m } }
From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java
License:Apache License
private static void writePartitionFile(JobConf job, Sampler sampler) { try {/*from w w w . ja va 2s.c o m*/ //////////////////////////////////////////////// // first, getting samples from the data sources //////////////////////////////////////////////// LOGGER.info("Running local sampling for job [" + job.getJobName() + "]"); InputFormat inf = job.getInputFormat(); Object[] samples = sampler.getSample(inf, job); LOGGER.info("Samples retrieved, sorting..."); //////////////////////////////////////////////// // sort the samples //////////////////////////////////////////////// RawComparator comparator = job.getOutputKeyComparator(); Arrays.sort(samples, comparator); if (job.getBoolean("mobius.print.sample", false)) { PrintWriter pw = new PrintWriter( new OutputStreamWriter(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream( new File(job.get("mobius.sample.file", "./samples.txt.gz"))))))); for (Object obj : samples) { pw.println(obj); } pw.flush(); pw.close(); } //////////////////////////////////////////////// // start to write partition files //////////////////////////////////////////////// FileSystem fs = FileSystem.get(job); Path partitionFile = fs.makeQualified(new Path(TotalOrderPartitioner.getPartitionFile(job))); while (fs.exists(partitionFile)) { partitionFile = new Path(partitionFile.toString() + "." + System.currentTimeMillis()); } fs.deleteOnExit(partitionFile); TotalOrderPartitioner.setPartitionFile(job, partitionFile); LOGGER.info("write partition file to:" + partitionFile.toString()); int reducersNbr = job.getNumReduceTasks(); Set<Object> wroteSamples = new HashSet<Object>(); SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, partitionFile, Tuple.class, NullWritable.class); float avgReduceSize = samples.length / reducersNbr; int lastBegin = 0; for (int i = 0; i < samples.length;) { // trying to distribute the load for every reducer evenly, // dividing the <code>samples</code> into a set of blocks // separated by boundaries, objects that selected from the // <code>samples</code> array, and each blocks should have // about the same size. // find the last index of element that equals to samples[i], as // such element might appear multiple times in the samples. int upperBound = Util.findUpperBound(samples, samples[i], comparator); int lowerBound = i;//Util.findLowerBound(samples, samples[i], comparator); // the repeat time of samples[i], if the key itself is too big // select it as boundary int currentElemSize = upperBound - lowerBound + 1; if (currentElemSize > avgReduceSize * 2) // greater than two times of average reducer size { // the current element is too big, greater than // two times of the <code>avgReduceSize</code>, // put itself as boundary writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get()); wroteSamples.add(((DataJoinKey) samples[i]).getKey()); //pw.println(samples[i]); // immediate put the next element to the boundary, // the next element starts at <code> upperBound+1 // </code>, to prevent the current one consume even // more. if (upperBound + 1 < samples.length) { writer.append(((DataJoinKey) samples[upperBound + 1]).getKey(), NullWritable.get()); wroteSamples.add(((DataJoinKey) samples[upperBound + 1]).getKey()); //pw.println(samples[upperBound+1]); // move on to the next element of <code>samples[upperBound+1]/code> lastBegin = Util.findUpperBound(samples, samples[upperBound + 1], comparator) + 1; i = lastBegin; } else { break; } } else { // current element is small enough to be consider // with previous group int size = upperBound - lastBegin; if (size > avgReduceSize) { // by including the current elements, we have // found a block that's big enough, select it // as boundary writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get()); wroteSamples.add(((DataJoinKey) samples[i]).getKey()); //pw.println(samples[i]); i = upperBound + 1; lastBegin = i; } else { i = upperBound + 1; } } } writer.close(); // if the number of wrote samples doesn't equals to number of // reducer minus one, then it means the key spaces is too small // hence TotalOrderPartitioner won't work, it works only if // the partition boundaries are distinct. // // we need to change the number of reducers if (wroteSamples.size() + 1 != reducersNbr) { LOGGER.info("Write complete, but key space is too small, sample size=" + wroteSamples.size() + ", reducer size:" + (reducersNbr)); LOGGER.info("Set the reducer size to:" + (wroteSamples.size() + 1)); // add 1 because the wrote samples define boundary, ex, if // the sample size is two with two element [300, 1000], then // there should be 3 reducers, one for handling i<300, one // for n300<=i<1000, and another one for 1000<=i job.setNumReduceTasks((wroteSamples.size() + 1)); } samples = null; } catch (IOException e) { LOGGER.error(e.getMessage(), e); throw new RuntimeException(e); } }
From source file:com.explorys.apothecary.hbase.mr.inputformat.MergedStoreFileInputFormatTest.java
License:Apache License
protected HRegion createNewHRegion(HTableDescriptor desc, byte[] startKey, byte[] endKey) throws IOException { Configuration conf = HBaseConfiguration.create(); FileSystem filesystem = FileSystem.get(conf); Path rootdir = filesystem.makeQualified(new Path(conf.get(HConstants.HBASE_DIR))); filesystem.mkdirs(rootdir);// w w w . ja v a 2 s . c o m return HRegion.createHRegion(new HRegionInfo(desc, startKey, endKey), rootdir, conf); }
From source file:com.facebook.LinkBench.LinkBenchDriverMR.java
License:Apache License
/** * setup input files for map reduce job/*from w w w.j a va 2s. com*/ * @param jobconf configuration of the map reduce job * @param nmappers number of mappers (loader or requester) */ private static FileSystem setupInputFiles(JobConf jobconf, int nmappers) throws IOException, InterruptedException { //setup input/output directories final Path indir = new Path(TMP_DIR, "in"); final Path outdir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobconf, indir); FileOutputFormat.setOutputPath(jobconf, outdir); final FileSystem fs = FileSystem.get(jobconf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(indir)) { throw new IOException("Cannot create input directory " + indir); } //generate an input file for each map task if (USE_INPUT_FILES) { for (int i = 0; i < nmappers; ++i) { final Path file = new Path(indir, "part" + i); final IntWritable mapperid = new IntWritable(i); final IntWritable nummappers = new IntWritable(nmappers); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobconf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(mapperid, nummappers); } finally { writer.close(); } logger.info("Wrote input for Map #" + i); } } return fs; }
From source file:com.floodCtr.Util.java
License:Open Source License
public static LocalResource newYarnAppResource(FileSystem fs, Path path, LocalResourceType type, LocalResourceVisibility vis) throws IOException { Path qualified = fs.makeQualified(path); FileStatus status = fs.getFileStatus(qualified); LocalResource resource = Records.newRecord(LocalResource.class); resource.setType(type);//from w w w .j a va 2 s . c o m resource.setVisibility(vis); resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified)); resource.setTimestamp(status.getModificationTime()); resource.setSize(status.getLen()); return resource; }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.AbstractHoplog.java
License:Apache License
private void initialize(Path path, SortedOplogStatistics stats, FileSystem fs) { this.conf = fs.getConf(); this.stats = stats; this.path = fs.makeQualified(path); this.hfd = new HoplogDescriptor(this.path.getName()); }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSUnsortedHoplogOrganizer.java
License:Apache License
/** * Fixes the size of hoplogs that were not closed properly last time. * Such hoplogs are *.tmphop files. Identify them and open them and close * them, this fixes the size. After doing this rename them to *.hop. * //from w w w . ja v a2 s . com * @throws IOException * @throws ForceReattemptException */ void identifyAndFixTmpHoplogs(FileSystem fs) throws IOException, ForceReattemptException { if (logger.isDebugEnabled()) logger.debug("{}Fixing temporary hoplogs", logPrefix); // A different filesystem is passed to this function for the following reason: // For HDFS, if a file wasn't closed properly last time, // while calling FileSystem.append for this file, FSNamesystem.startFileInternal-> // FSNamesystem.recoverLeaseInternal function gets called. // This function throws AlreadyBeingCreatedException if there is an open handle, to any other file, // created using the same FileSystem object. This is a bug and is being tracked at: // https://issues.apache.org/jira/browse/HDFS-3848?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel // // The fix for this bug is not yet part of Pivotal HD. So to overcome the bug, // we create a new file system for the timer task so that it does not encounter the bug. FileStatus tmpHoplogs[] = FSUtils.listStatus(fs, fs.makeQualified(bucketPath), new PathFilter() { @Override public boolean accept(Path file) { // All valid hoplog files must match the regex Matcher matcher = patternForTmpHoplog.matcher(file.getName()); return matcher.matches(); } }); if (tmpHoplogs == null || tmpHoplogs.length == 0) { if (logger.isDebugEnabled()) logger.debug("{}No files to fix", logPrefix); return; } // ping secondaries so that in case of split brain, no other vm has taken up // as primary. #50110. pingSecondaries(); if (logger.isDebugEnabled()) logger.debug("{}Files to fix " + tmpHoplogs.length, logPrefix); String currentHoplogName = null; // get the current hoplog name. We need to ignore current hoplog while fixing. if (currentHoplog != null) { currentHoplogName = currentHoplog.getFileName(); } for (int i = 0; i < tmpHoplogs.length; i++) { // Skip directories if (tmpHoplogs[i].isDirectory()) { continue; } final Path p = tmpHoplogs[i].getPath(); if (tmpHoplogs[i].getPath().getName().equals(currentHoplogName)) { if (logger.isDebugEnabled()) logger.debug("Skipping current file: " + tmpHoplogs[i].getPath().getName(), logPrefix); continue; } SequenceFileHoplog hoplog = new SequenceFileHoplog(fs, p, stats); try { makeLegitimate(hoplog); logger.info(LocalizedMessage.create(LocalizedStrings.DEBUG, "Hoplog " + p + " was a temporary " + "hoplog because the node managing it wasn't shutdown properly last time. Fixed the hoplog name.")); } catch (IOException e) { logger.info(LocalizedMessage.create(LocalizedStrings.DEBUG, "Hoplog " + p + " is still a temporary " + "hoplog because the node managing it wasn't shutdown properly last time. Failed to " + "change the hoplog name because an exception was thrown while fixing it. " + e)); } } }
From source file:com.github.gaoyangthu.demo.mapred.PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//w ww . j a v a 2 s . co m */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:com.github.sadikovi.riff.FileWriter.java
License:Open Source License
/** * Create file writer for path./*ww w.j ava2 s . c o m*/ * Configuration is passed separately and not reused from `fs.getConf`. This is to be explicit * about separate configuration from most of the hadoop settings. Actual user-facing API will * allow providing configuration for both file system and internal options. * @param fs file system to use * @param conf configuration * @param path path to the header file, also used to create data path * @param td type description for rows * @param codec compression codec * @throws IOException * @throws FileAlreadyExistsException */ FileWriter(FileSystem fs, Configuration conf, Path path, TypeDescription td, CompressionCodec codec) throws IOException { this.fs = fs; this.filePath = fs.makeQualified(path); this.writePrepared = false; this.writeFinished = false; if (this.fs.exists(filePath)) { throw new FileAlreadyExistsException("Already exists: " + filePath); } // this assumes that subsequent rows are provided for this schema this.td = td; this.numRowsInStripe = Riff.Options.numRowsInStripe(conf); this.bufferSize = Riff.Options.power2BufferSize(conf); this.hdfsBufferSize = Riff.Options.hdfsBufferSize(conf); this.columnFilterEnabled = Riff.Options.columnFilterEnabled(conf); this.codec = codec; // current stripe stats and filters this.stripeStats = null; this.stripeFilters = null; // file properties, by default not initialized this.fileProperties = null; }
From source file:com.hazelcast.yarn.YarnUtil.java
License:Open Source License
public static LocalResource createFileResource(Path file, FileSystem fs, LocalResourceType type) throws Exception { LocalResource resource = Records.newRecord(LocalResource.class); file = fs.makeQualified(file); FileStatus stat = fs.getFileStatus(file); resource.setResource(ConverterUtils.getYarnUrlFromPath(file)); resource.setSize(stat.getLen());/* www. ja va 2 s .c o m*/ resource.setTimestamp(stat.getModificationTime()); resource.setType(type); resource.setVisibility(LocalResourceVisibility.APPLICATION); return resource; }