Example usage for org.apache.hadoop.fs FileSystem makeQualified

List of usage examples for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path) 

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:com.ebay.erl.mobius.core.JobSetup.java

License:Apache License

private static void ensureOutputDelete(Path outputFolder, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    outputFolder = fs.makeQualified(outputFolder);
    if (fs.exists(outputFolder)) {
        LOGGER.info("Deleting " + outputFolder.toString());
        fs.delete(outputFolder, true);//  ww w  .j  a  va 2  s.  co m
    }
}

From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java

License:Apache License

private static void writePartitionFile(JobConf job, Sampler sampler) {
    try {/*from  w w  w  . ja va  2s.c o  m*/
        ////////////////////////////////////////////////
        // first, getting samples from the data sources
        ////////////////////////////////////////////////
        LOGGER.info("Running local sampling for job [" + job.getJobName() + "]");
        InputFormat inf = job.getInputFormat();
        Object[] samples = sampler.getSample(inf, job);
        LOGGER.info("Samples retrieved, sorting...");

        ////////////////////////////////////////////////
        // sort the samples
        ////////////////////////////////////////////////
        RawComparator comparator = job.getOutputKeyComparator();
        Arrays.sort(samples, comparator);

        if (job.getBoolean("mobius.print.sample", false)) {
            PrintWriter pw = new PrintWriter(
                    new OutputStreamWriter(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(
                            new File(job.get("mobius.sample.file", "./samples.txt.gz")))))));
            for (Object obj : samples) {
                pw.println(obj);
            }
            pw.flush();
            pw.close();
        }

        ////////////////////////////////////////////////
        // start to write partition files
        ////////////////////////////////////////////////

        FileSystem fs = FileSystem.get(job);
        Path partitionFile = fs.makeQualified(new Path(TotalOrderPartitioner.getPartitionFile(job)));
        while (fs.exists(partitionFile)) {
            partitionFile = new Path(partitionFile.toString() + "." + System.currentTimeMillis());
        }
        fs.deleteOnExit(partitionFile);
        TotalOrderPartitioner.setPartitionFile(job, partitionFile);
        LOGGER.info("write partition file to:" + partitionFile.toString());

        int reducersNbr = job.getNumReduceTasks();
        Set<Object> wroteSamples = new HashSet<Object>();

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, partitionFile, Tuple.class,
                NullWritable.class);

        float avgReduceSize = samples.length / reducersNbr;

        int lastBegin = 0;
        for (int i = 0; i < samples.length;) {
            // trying to distribute the load for every reducer evenly,
            // dividing the <code>samples</code> into a set of blocks
            // separated by boundaries, objects that selected from the
            // <code>samples</code> array, and each blocks should have
            // about the same size.

            // find the last index of element that equals to samples[i], as
            // such element might appear multiple times in the samples.
            int upperBound = Util.findUpperBound(samples, samples[i], comparator);

            int lowerBound = i;//Util.findLowerBound(samples, samples[i], comparator);

            // the repeat time of samples[i], if the key itself is too big
            // select it as boundary
            int currentElemSize = upperBound - lowerBound + 1;

            if (currentElemSize > avgReduceSize * 2) // greater than two times of average reducer size
            {
                // the current element is too big, greater than
                // two times of the <code>avgReduceSize</code>, 
                // put itself as boundary
                writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get());
                wroteSamples.add(((DataJoinKey) samples[i]).getKey());
                //pw.println(samples[i]);

                // immediate put the next element to the boundary,
                // the next element starts at <code> upperBound+1
                // </code>, to prevent the current one consume even 
                // more.
                if (upperBound + 1 < samples.length) {
                    writer.append(((DataJoinKey) samples[upperBound + 1]).getKey(), NullWritable.get());
                    wroteSamples.add(((DataJoinKey) samples[upperBound + 1]).getKey());
                    //pw.println(samples[upperBound+1]);

                    // move on to the next element of <code>samples[upperBound+1]/code>
                    lastBegin = Util.findUpperBound(samples, samples[upperBound + 1], comparator) + 1;
                    i = lastBegin;
                } else {
                    break;
                }
            } else {
                // current element is small enough to be consider
                // with previous group
                int size = upperBound - lastBegin;
                if (size > avgReduceSize) {
                    // by including the current elements, we have
                    // found a block that's big enough, select it
                    // as boundary
                    writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get());
                    wroteSamples.add(((DataJoinKey) samples[i]).getKey());
                    //pw.println(samples[i]);

                    i = upperBound + 1;
                    lastBegin = i;
                } else {
                    i = upperBound + 1;
                }
            }
        }

        writer.close();

        // if the number of wrote samples doesn't equals to number of
        // reducer minus one, then it means the key spaces is too small
        // hence TotalOrderPartitioner won't work, it works only if 
        // the partition boundaries are distinct.
        //
        // we need to change the number of reducers
        if (wroteSamples.size() + 1 != reducersNbr) {
            LOGGER.info("Write complete, but key space is too small, sample size=" + wroteSamples.size()
                    + ", reducer size:" + (reducersNbr));
            LOGGER.info("Set the reducer size to:" + (wroteSamples.size() + 1));

            // add 1 because the wrote samples define boundary, ex, if
            // the sample size is two with two element [300, 1000], then 
            // there should be 3 reducers, one for handling i<300, one 
            // for n300<=i<1000, and another one for 1000<=i
            job.setNumReduceTasks((wroteSamples.size() + 1));
        }

        samples = null;
    } catch (IOException e) {
        LOGGER.error(e.getMessage(), e);
        throw new RuntimeException(e);
    }
}

From source file:com.explorys.apothecary.hbase.mr.inputformat.MergedStoreFileInputFormatTest.java

License:Apache License

protected HRegion createNewHRegion(HTableDescriptor desc, byte[] startKey, byte[] endKey) throws IOException {
    Configuration conf = HBaseConfiguration.create();
    FileSystem filesystem = FileSystem.get(conf);
    Path rootdir = filesystem.makeQualified(new Path(conf.get(HConstants.HBASE_DIR)));
    filesystem.mkdirs(rootdir);// w  w w  .  ja v  a  2  s  . c  o  m

    return HRegion.createHRegion(new HRegionInfo(desc, startKey, endKey), rootdir, conf);
}

From source file:com.facebook.LinkBench.LinkBenchDriverMR.java

License:Apache License

/**
 * setup input files for map reduce job/*from   w  w  w.j a  va  2s. com*/
 * @param jobconf configuration of the map reduce job
 * @param nmappers number of mappers (loader or requester)
 */
private static FileSystem setupInputFiles(JobConf jobconf, int nmappers)
        throws IOException, InterruptedException {
    //setup input/output directories
    final Path indir = new Path(TMP_DIR, "in");
    final Path outdir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobconf, indir);
    FileOutputFormat.setOutputPath(jobconf, outdir);

    final FileSystem fs = FileSystem.get(jobconf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(indir)) {
        throw new IOException("Cannot create input directory " + indir);
    }

    //generate an input file for each map task
    if (USE_INPUT_FILES) {
        for (int i = 0; i < nmappers; ++i) {
            final Path file = new Path(indir, "part" + i);
            final IntWritable mapperid = new IntWritable(i);
            final IntWritable nummappers = new IntWritable(nmappers);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobconf, file, IntWritable.class,
                    IntWritable.class, CompressionType.NONE);
            try {
                writer.append(mapperid, nummappers);
            } finally {
                writer.close();
            }
            logger.info("Wrote input for Map #" + i);
        }
    }
    return fs;
}

From source file:com.floodCtr.Util.java

License:Open Source License

public static LocalResource newYarnAppResource(FileSystem fs, Path path, LocalResourceType type,
        LocalResourceVisibility vis) throws IOException {
    Path qualified = fs.makeQualified(path);
    FileStatus status = fs.getFileStatus(qualified);
    LocalResource resource = Records.newRecord(LocalResource.class);

    resource.setType(type);//from w  w w  .j a va 2  s . c  o m
    resource.setVisibility(vis);
    resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified));
    resource.setTimestamp(status.getModificationTime());
    resource.setSize(status.getLen());

    return resource;
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.AbstractHoplog.java

License:Apache License

private void initialize(Path path, SortedOplogStatistics stats, FileSystem fs) {
    this.conf = fs.getConf();
    this.stats = stats;
    this.path = fs.makeQualified(path);
    this.hfd = new HoplogDescriptor(this.path.getName());
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HDFSUnsortedHoplogOrganizer.java

License:Apache License

/**
 * Fixes the size of hoplogs that were not closed properly last time. 
 * Such hoplogs are *.tmphop files. Identify them and open them and close 
 * them, this fixes the size. After doing this rename them to *.hop. 
 * //from   w  w  w  .  ja  v a2  s . com
 * @throws IOException
 * @throws ForceReattemptException 
 */
void identifyAndFixTmpHoplogs(FileSystem fs) throws IOException, ForceReattemptException {
    if (logger.isDebugEnabled())
        logger.debug("{}Fixing temporary hoplogs", logPrefix);

    // A different filesystem is passed to this function for the following reason: 
    // For HDFS, if a file wasn't closed properly last time, 
    // while calling FileSystem.append for this file, FSNamesystem.startFileInternal->
    // FSNamesystem.recoverLeaseInternal function gets called. 
    // This function throws AlreadyBeingCreatedException if there is an open handle, to any other file, 
    // created using the same FileSystem object. This is a bug and is being tracked at: 
    // https://issues.apache.org/jira/browse/HDFS-3848?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
    // 
    // The fix for this bug is not yet part of Pivotal HD. So to overcome the bug, 
    // we create a new file system for the timer task so that it does not encounter the bug. 

    FileStatus tmpHoplogs[] = FSUtils.listStatus(fs, fs.makeQualified(bucketPath), new PathFilter() {
        @Override
        public boolean accept(Path file) {
            // All valid hoplog files must match the regex
            Matcher matcher = patternForTmpHoplog.matcher(file.getName());
            return matcher.matches();
        }
    });

    if (tmpHoplogs == null || tmpHoplogs.length == 0) {
        if (logger.isDebugEnabled())
            logger.debug("{}No files to fix", logPrefix);
        return;
    }
    // ping secondaries so that in case of split brain, no other vm has taken up 
    // as primary. #50110. 
    pingSecondaries();
    if (logger.isDebugEnabled())
        logger.debug("{}Files to fix " + tmpHoplogs.length, logPrefix);

    String currentHoplogName = null;
    // get the current hoplog name. We need to ignore current hoplog while fixing. 
    if (currentHoplog != null) {
        currentHoplogName = currentHoplog.getFileName();
    }

    for (int i = 0; i < tmpHoplogs.length; i++) {
        // Skip directories
        if (tmpHoplogs[i].isDirectory()) {
            continue;
        }

        final Path p = tmpHoplogs[i].getPath();

        if (tmpHoplogs[i].getPath().getName().equals(currentHoplogName)) {
            if (logger.isDebugEnabled())
                logger.debug("Skipping current file: " + tmpHoplogs[i].getPath().getName(), logPrefix);
            continue;
        }

        SequenceFileHoplog hoplog = new SequenceFileHoplog(fs, p, stats);
        try {
            makeLegitimate(hoplog);
            logger.info(LocalizedMessage.create(LocalizedStrings.DEBUG, "Hoplog " + p + " was a temporary "
                    + "hoplog because the node managing it wasn't shutdown properly last time. Fixed the hoplog name."));
        } catch (IOException e) {
            logger.info(LocalizedMessage.create(LocalizedStrings.DEBUG, "Hoplog " + p + " is still a temporary "
                    + "hoplog because the node managing it wasn't shutdown properly last time. Failed to "
                    + "change the hoplog name because an exception was thrown while fixing it. " + e));
        }
    }
}

From source file:com.github.gaoyangthu.demo.mapred.PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//w  ww . j  a v a 2 s . co  m
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    //setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:com.github.sadikovi.riff.FileWriter.java

License:Open Source License

/**
 * Create file writer for path./*ww w.j  ava2 s .  c o m*/
 * Configuration is passed separately and not reused from `fs.getConf`. This is to be explicit
 * about separate configuration from most of the hadoop settings. Actual user-facing API will
 * allow providing configuration for both file system and internal options.
 * @param fs file system to use
 * @param conf configuration
 * @param path path to the header file, also used to create data path
 * @param td type description for rows
 * @param codec compression codec
 * @throws IOException
 * @throws FileAlreadyExistsException
 */
FileWriter(FileSystem fs, Configuration conf, Path path, TypeDescription td, CompressionCodec codec)
        throws IOException {
    this.fs = fs;
    this.filePath = fs.makeQualified(path);
    this.writePrepared = false;
    this.writeFinished = false;
    if (this.fs.exists(filePath)) {
        throw new FileAlreadyExistsException("Already exists: " + filePath);
    }
    // this assumes that subsequent rows are provided for this schema
    this.td = td;
    this.numRowsInStripe = Riff.Options.numRowsInStripe(conf);
    this.bufferSize = Riff.Options.power2BufferSize(conf);
    this.hdfsBufferSize = Riff.Options.hdfsBufferSize(conf);
    this.columnFilterEnabled = Riff.Options.columnFilterEnabled(conf);
    this.codec = codec;
    // current stripe stats and filters
    this.stripeStats = null;
    this.stripeFilters = null;
    // file properties, by default not initialized
    this.fileProperties = null;
}

From source file:com.hazelcast.yarn.YarnUtil.java

License:Open Source License

public static LocalResource createFileResource(Path file, FileSystem fs, LocalResourceType type)
        throws Exception {
    LocalResource resource = Records.newRecord(LocalResource.class);

    file = fs.makeQualified(file);
    FileStatus stat = fs.getFileStatus(file);
    resource.setResource(ConverterUtils.getYarnUrlFromPath(file));
    resource.setSize(stat.getLen());/*  www. ja va 2 s .c  o m*/
    resource.setTimestamp(stat.getModificationTime());
    resource.setType(type);
    resource.setVisibility(LocalResourceVisibility.APPLICATION);
    return resource;
}