Example usage for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path)

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:com.ebay.erl.mobius.core.JobSetup.java

License:Apache License

private static void ensureOutputDelete(Path outputFolder, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    outputFolder = fs.makeQualified(outputFolder);
    if (fs.exists(outputFolder)) {
        LOGGER.info("Deleting " + outputFolder.toString());
        fs.delete(outputFolder, true);//  ww w  .j  a  va 2  s.  co m
    }
}

From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java

License:Apache License

private static void writePartitionFile(JobConf job, Sampler sampler) {
    try {/*from  w w  w  . ja va  2s.c o  m*/
        ////////////////////////////////////////////////
        // first, getting samples from the data sources
        ////////////////////////////////////////////////
        LOGGER.info("Running local sampling for job [" + job.getJobName() + "]");
        InputFormat inf = job.getInputFormat();
        Object[] samples = sampler.getSample(inf, job);
        LOGGER.info("Samples retrieved, sorting...");

        ////////////////////////////////////////////////
        // sort the samples
        ////////////////////////////////////////////////
        RawComparator comparator = job.getOutputKeyComparator();
        Arrays.sort(samples, comparator);

        if (job.getBoolean("mobius.print.sample", false)) {
            PrintWriter pw = new PrintWriter(
                    new OutputStreamWriter(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(
                            new File(job.get("mobius.sample.file", "./samples.txt.gz")))))));
            for (Object obj : samples) {
                pw.println(obj);
            }
            pw.flush();
            pw.close();
        }

        ////////////////////////////////////////////////
        // start to write partition files
        ////////////////////////////////////////////////

        FileSystem fs = FileSystem.get(job);
        Path partitionFile = fs.makeQualified(new Path(TotalOrderPartitioner.getPartitionFile(job)));
        while (fs.exists(partitionFile)) {
            partitionFile = new Path(partitionFile.toString() + "." + System.currentTimeMillis());
        }
        fs.deleteOnExit(partitionFile);
        TotalOrderPartitioner.setPartitionFile(job, partitionFile);
        LOGGER.info("write partition file to:" + partitionFile.toString());

        int reducersNbr = job.getNumReduceTasks();
        Set<Object> wroteSamples = new HashSet<Object>();

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, partitionFile, Tuple.class,
                NullWritable.class);

        float avgReduceSize = samples.length / reducersNbr;

        int lastBegin = 0;
        for (int i = 0; i < samples.length;) {
            // trying to distribute the load for every reducer evenly,
            // dividing the <code>samples</code> into a set of blocks
            // separated by boundaries, objects that selected from the
            // <code>samples</code> array, and each blocks should have
            // about the same size.

            // find the last index of element that equals to samples[i], as
            // such element might appear multiple times in the samples.
            int upperBound = Util.findUpperBound(samples, samples[i], comparator);

            int lowerBound = i;//Util.findLowerBound(samples, samples[i], comparator);

            // the repeat time of samples[i], if the key itself is too big
            // select it as boundary
            int currentElemSize = upperBound - lowerBound + 1;

            if (currentElemSize > avgReduceSize * 2) // greater than two times of average reducer size
            {
                // the current element is too big, greater than
                // two times of the <code>avgReduceSize</code>, 
                // put itself as boundary
                writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get());
                wroteSamples.add(((DataJoinKey) samples[i]).getKey());
                //pw.println(samples[i]);

                // immediate put the next element to the boundary,
                // the next element starts at <code> upperBound+1
                // </code>, to prevent the current one consume even 
                // more.
                if (upperBound + 1 < samples.length) {
                    writer.append(((DataJoinKey) samples[upperBound + 1]).getKey(), NullWritable.get());
                    wroteSamples.add(((DataJoinKey) samples[upperBound + 1]).getKey());
                    //pw.println(samples[upperBound+1]);

                    // move on to the next element of <code>samples[upperBound+1]/code>
                    lastBegin = Util.findUpperBound(samples, samples[upperBound + 1], comparator) + 1;
                    i = lastBegin;
                } else {
                    break;
                }
            } else {
                // current element is small enough to be consider
                // with previous group
                int size = upperBound - lastBegin;
                if (size > avgReduceSize) {
                    // by including the current elements, we have
                    // found a block that's big enough, select it
                    // as boundary
                    writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get());
                    wroteSamples.add(((DataJoinKey) samples[i]).getKey());
                    //pw.println(samples[i]);

                    i = upperBound + 1;
                    lastBegin = i;
                } else {
                    i = upperBound + 1;
                }
            }
        }

        writer.close();

        // if the number of wrote samples doesn't equals to number of
        // reducer minus one, then it means the key spaces is too small
        // hence TotalOrderPartitioner won't work, it works only if 
        // the partition boundaries are distinct.
        //
        // we need to change the number of reducers
        if (wroteSamples.size() + 1 != reducersNbr) {
            LOGGER.info("Write complete, but key space is too small, sample size=" + wroteSamples.size()
                    + ", reducer size:" + (reducersNbr));
            LOGGER.info("Set the reducer size to:" + (wroteSamples.size() + 1));

            // add 1 because the wrote samples define boundary, ex, if
            // the sample size is two with two element [300, 1000], then 
            // there should be 3 reducers, one for handling i<300, one 
            // for n300<=i<1000, and another one for 1000<=i
            job.setNumReduceTasks((wroteSamples.size() + 1));
        }

        samples = null;
    } catch (IOException e) {
        LOGGER.error(e.getMessage(), e);
        throw new RuntimeException(e);
    }
}

From source file:com.explorys.apothecary.hbase.mr.inputformat.MergedStoreFileInputFormatTest.java

License:Apache License

protected HRegion createNewHRegion(HTableDescriptor desc, byte[] startKey, byte[] endKey) throws IOException {
    Configuration conf = HBaseConfiguration.create();
    FileSystem filesystem = FileSystem.get(conf);
    Path rootdir = filesystem.makeQualified(new Path(conf.get(HConstants.HBASE_DIR)));
    filesystem.mkdirs(rootdir);// w  w w  .  ja v  a  2  s  . c  o  m

    return HRegion.createHRegion(new HRegionInfo(desc, startKey, endKey), rootdir, conf);
}

From source file:com.facebook.LinkBench.LinkBenchDriverMR.java

License:Apache License

/**
 * setup input files for map reduce job/*from   w  w  w.j a  va  2s. com*/
 * @param jobconf configuration of the map reduce job
 * @param nmappers number of mappers (loader or requester)
 */
private static FileSystem setupInputFiles(JobConf jobconf, int nmappers)
        throws IOException, InterruptedException {
    //setup input/output directories
    final Path indir = new Path(TMP_DIR, "in");
    final Path outdir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobconf, indir);
    FileOutputFormat.setOutputPath(jobconf, outdir);

    final FileSystem fs = FileSystem.get(jobconf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(indir)) {
        throw new IOException("Cannot create input directory " + indir);
    }

    //generate an input file for each map task
    if (USE_INPUT_FILES) {
        for (int i = 0; i < nmappers; ++i) {
            final Path file = new Path(indir, "part" + i);
            final IntWritable mapperid = new IntWritable(i);
            final IntWritable nummappers = new IntWritable(nmappers);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobconf, file, IntWritable.class,
                    IntWritable.class, CompressionType.NONE);
            try {
                writer.append(mapperid, nummappers);
            } finally {
                writer.close();
            }
            logger.info("Wrote input for Map #" + i);
        }
    }
    return fs;
}

From source file:com.floodCtr.Util.java

License:Open Source License

public static LocalResource newYarnAppResource(FileSystem fs, Path path, LocalResourceType type,
        LocalResourceVisibility vis) throws IOException {
    Path qualified = fs.makeQualified(path);
    FileStatus status = fs.getFileStatus(qualified);
    LocalResource resource = Records.newRecord(LocalResource.class);

    resource.setType(type);//from w  w w  .j a va 2  s . c  o m
    resource.setVisibility(vis);
    resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified));
    resource.setTimestamp(status.getModificationTime());
    resource.setSize(status.getLen());

    return resource;
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.AbstractHoplog.java

License:Apache License

private void initialize(Path path, SortedOplogStatistics stats, FileSystem fs) {
    this.conf = fs.getConf();
    this.stats = stats;
    this.path = fs.makeQualified(path);
    this.hfd = new HoplogDescriptor(this.path.getName());
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSUnsortedHoplogOrganizer.java

License:Apache License

/**
 * Fixes the size of hoplogs that were not closed properly last time. 
 * Such hoplogs are *.tmphop files. Identify them and open them and close 
 * them, this fixes the size. After doing this rename them to *.hop. 
 * //from   w  w  w  .  ja  v a2  s . com
 * @throws IOException
 * @throws ForceReattemptException 
 */
void identifyAndFixTmpHoplogs(FileSystem fs) throws IOException, ForceReattemptException {
    if (logger.isDebugEnabled())
        logger.debug("{}Fixing temporary hoplogs", logPrefix);

    // A different filesystem is passed to this function for the following reason: 
    // For HDFS, if a file wasn't closed properly last time, 
    // while calling FileSystem.append for this file, FSNamesystem.startFileInternal->
    // FSNamesystem.recoverLeaseInternal function gets called. 
    // This function throws AlreadyBeingCreatedException if there is an open handle, to any other file, 
    // created using the same FileSystem object. This is a bug and is being tracked at: 
    // https://issues.apache.org/jira/browse/HDFS-3848?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
    // 
    // The fix for this bug is not yet part of Pivotal HD. So to overcome the bug, 
    // we create a new file system for the timer task so that it does not encounter the bug. 

    FileStatus tmpHoplogs[] = FSUtils.listStatus(fs, fs.makeQualified(bucketPath), new PathFilter() {
        @Override
        public boolean accept(Path file) {
            // All valid hoplog files must match the regex
            Matcher matcher = patternForTmpHoplog.matcher(file.getName());
            return matcher.matches();
        }
    });

    if (tmpHoplogs == null || tmpHoplogs.length == 0) {
        if (logger.isDebugEnabled())
            logger.debug("{}No files to fix", logPrefix);
        return;
    }
    // ping secondaries so that in case of split brain, no other vm has taken up 
    // as primary. #50110. 
    pingSecondaries();
    if (logger.isDebugEnabled())
        logger.debug("{}Files to fix " + tmpHoplogs.length, logPrefix);

    String currentHoplogName = null;
    // get the current hoplog name. We need to ignore current hoplog while fixing. 
    if (currentHoplog != null) {
        currentHoplogName = currentHoplog.getFileName();
    }

    for (int i = 0; i < tmpHoplogs.length; i++) {
        // Skip directories
        if (tmpHoplogs[i].isDirectory()) {
            continue;
        }

        final Path p = tmpHoplogs[i].getPath();

        if (tmpHoplogs[i].getPath().getName().equals(currentHoplogName)) {
            if (logger.isDebugEnabled())
                logger.debug("Skipping current file: " + tmpHoplogs[i].getPath().getName(), logPrefix);
            continue;
        }

        SequenceFileHoplog hoplog = new SequenceFileHoplog(fs, p, stats);
        try {
            makeLegitimate(hoplog);
            logger.info(LocalizedMessage.create(LocalizedStrings.DEBUG, "Hoplog " + p + " was a temporary "
                    + "hoplog because the node managing it wasn't shutdown properly last time. Fixed the hoplog name."));
        } catch (IOException e) {
            logger.info(LocalizedMessage.create(LocalizedStrings.DEBUG, "Hoplog " + p + " is still a temporary "
                    + "hoplog because the node managing it wasn't shutdown properly last time. Failed to "
                    + "change the hoplog name because an exception was thrown while fixing it. " + e));
        }
    }
}

From source file:com.github.gaoyangthu.demo.mapred.PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//w  ww . j  a v a 2 s . co  m
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    //setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:com.github.sadikovi.riff.FileWriter.java

License:Open Source License

/**
 * Create file writer for path./*ww w.j  ava2 s .  c o m*/
 * Configuration is passed separately and not reused from `fs.getConf`. This is to be explicit
 * about separate configuration from most of the hadoop settings. Actual user-facing API will
 * allow providing configuration for both file system and internal options.
 * @param fs file system to use
 * @param conf configuration
 * @param path path to the header file, also used to create data path
 * @param td type description for rows
 * @param codec compression codec
 * @throws IOException
 * @throws FileAlreadyExistsException
 */
FileWriter(FileSystem fs, Configuration conf, Path path, TypeDescription td, CompressionCodec codec)
        throws IOException {
    this.fs = fs;
    this.filePath = fs.makeQualified(path);
    this.writePrepared = false;
    this.writeFinished = false;
    if (this.fs.exists(filePath)) {
        throw new FileAlreadyExistsException("Already exists: " + filePath);
    }
    // this assumes that subsequent rows are provided for this schema
    this.td = td;
    this.numRowsInStripe = Riff.Options.numRowsInStripe(conf);
    this.bufferSize = Riff.Options.power2BufferSize(conf);
    this.hdfsBufferSize = Riff.Options.hdfsBufferSize(conf);
    this.columnFilterEnabled = Riff.Options.columnFilterEnabled(conf);
    this.codec = codec;
    // current stripe stats and filters
    this.stripeStats = null;
    this.stripeFilters = null;
    // file properties, by default not initialized
    this.fileProperties = null;
}

From source file:com.hazelcast.yarn.YarnUtil.java

License:Open Source License

public static LocalResource createFileResource(Path file, FileSystem fs, LocalResourceType type)
        throws Exception {
    LocalResource resource = Records.newRecord(LocalResource.class);

    file = fs.makeQualified(file);
    FileStatus stat = fs.getFileStatus(file);
    resource.setResource(ConverterUtils.getYarnUrlFromPath(file));
    resource.setSize(stat.getLen());/*  www. ja va 2 s .c  o m*/
    resource.setTimestamp(stat.getModificationTime());
    resource.setType(type);
    resource.setVisibility(LocalResourceVisibility.APPLICATION);
    return resource;
}