Example usage for org.apache.hadoop.fs FileStatus FileStatus

List of usage examples for org.apache.hadoop.fs FileStatus FileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus FileStatus.

Prototype

public FileStatus() 

Source Link

Usage

From source file:com.hdfs.concat.crush.BucketerTest.java

License:Apache License

@Test(expected = IllegalStateException.class)
public void callAddBeforeReset() {
    bucketer.add(new FileStatusHasSize(new FileStatus()));
}

From source file:com.inmobi.conduit.distcp.tools.CopyListing.java

License:Apache License

/**
 * Validate the final resulting path listing to see if there are any duplicate entries
 *
 * @param pathToListFile - path listing build by doBuildListing
 * @throws IOException - Any issues while checking for duplicates and throws
 * @throws DuplicateFileException - if there are duplicates
 *///from   w  w  w.j  ava2 s .  co  m
protected void checkForDuplicates(Path pathToListFile) throws DuplicateFileException, IOException {

    Configuration config = getConf();
    FileSystem fs = pathToListFile.getFileSystem(config);

    Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile);

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, sortedList, config);
    try {
        Text lastKey = new Text("*"); //source relative path can never hold *
        FileStatus lastFileStatus = new FileStatus();

        Text currentKey = new Text();
        while (reader.next(currentKey)) {
            if (currentKey.equals(lastKey)) {
                FileStatus currentFileStatus = new FileStatus();
                reader.getCurrentValue(currentFileStatus);
                throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and "
                        + currentFileStatus.getPath() + " would cause duplicates. Aborting");
            }
            reader.getCurrentValue(lastFileStatus);
            lastKey.set(currentKey);
        }
    } finally {
        IOUtils.closeStream(reader);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void preserveFileAttributes(Configuration conf) throws IOException {
    String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
    LOG.info("About to preserve attributes: " + attrSymbols);

    EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);

    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
    FileSystem clusterFS = sourceListing.getFileSystem(conf);
    SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sourceListing, conf);
    long totalLen = clusterFS.getFileStatus(sourceListing).getLen();

    Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));

    long preservedEntries = 0;
    try {/* www  .  j  a  va2 s  . com*/
        FileStatus srcFileStatus = new FileStatus();
        Text srcRelPath = new Text();

        while (sourceReader.next(srcRelPath, srcFileStatus)) {
            if (!srcFileStatus.isDir())
                continue;

            Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);

            //Skip the root folder, preserve the status after atomic commit is complete
            //If it is changed any earlier, then atomic commit may fail
            if (targetRoot.equals(targetFile))
                continue;

            FileSystem targetFS = targetFile.getFileSystem(conf);
            DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes);

            HadoopCompat.progress(taskAttemptContext);
            HadoopCompat.setStatus(taskAttemptContext, "Preserving status on directory entries. ["
                    + sourceReader.getPosition() * 100 / totalLen + "%]");
        }
    } finally {
        IOUtils.closeStream(sourceReader);
    }
    LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void deleteMissing(Configuration conf) throws IOException {
    LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source");

    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
    FileSystem clusterFS = sourceListing.getFileSystem(conf);
    Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);

    Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
    CopyListing target = new GlobbedCopyListing(conf, null);

    List<Path> targets = new ArrayList<Path>(1);
    Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
    targets.add(targetFinalPath);//from  ww  w. j  a  v  a  2 s  .  com
    DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));

    target.buildListing(targetListing, options);
    Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
    long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();

    SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf);
    SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf);

    long deletedEntries = 0;
    try {
        FileStatus srcFileStatus = new FileStatus();
        Text srcRelPath = new Text();
        FileStatus trgtFileStatus = new FileStatus();
        Text trgtRelPath = new Text();

        FileSystem targetFS = targetFinalPath.getFileSystem(conf);
        boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
        while (targetReader.next(trgtRelPath, trgtFileStatus)) {
            while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
                srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
            }

            if (srcAvailable && trgtRelPath.equals(srcRelPath))
                continue;

            boolean result = (!targetFS.exists(trgtFileStatus.getPath())
                    || targetFS.delete(trgtFileStatus.getPath(), true));
            if (result) {
                LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
                deletedEntries++;
            } else {
                throw new IOException("Unable to delete " + trgtFileStatus.getPath());
            }
            HadoopCompat.progress(taskAttemptContext);
            HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. ["
                    + targetReader.getPosition() * 100 / totalLen + "%]");
        }
    } finally {
        IOUtils.closeStream(sourceReader);
        IOUtils.closeStream(targetReader);
    }
    LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}

From source file:com.inmobi.conduit.distcp.tools.mapred.lib.DynamicInputFormat.java

License:Apache License

private List<DynamicInputChunk> splitCopyListingIntoChunksWithShuffle(JobContext context) throws IOException {

    final Configuration configuration = HadoopCompat.getConfiguration(context);
    int numRecords = getNumberOfRecords(configuration);
    int numMaps = getNumMapTasks(configuration);
    // Number of chunks each map will process, on average.
    int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords);
    validateNumChunksUsing(splitRatio, numMaps);

    int numEntriesPerChunk = (int) Math.ceil((float) numRecords / (splitRatio * numMaps));
    DistCpUtils.publish(HadoopCompat.getConfiguration(context), CONF_LABEL_NUM_ENTRIES_PER_CHUNK,
            numEntriesPerChunk);/*from w  w  w.jav a 2 s. c  om*/

    final int nChunksTotal = (int) Math.ceil((float) numRecords / numEntriesPerChunk);
    int nChunksOpenAtOnce = Math.min(N_CHUNKS_OPEN_AT_ONCE_DEFAULT, nChunksTotal);

    Path listingPath = getListingFilePath(configuration);
    SequenceFile.Reader reader = new SequenceFile.Reader(listingPath.getFileSystem(configuration), listingPath,
            configuration);

    List<DynamicInputChunk> openChunks = new ArrayList<DynamicInputChunk>();

    List<DynamicInputChunk> chunksFinal = new ArrayList<DynamicInputChunk>();

    FileStatus fileStatus = new FileStatus();
    Text relPath = new Text();
    int recordCounter = 0;
    int chunkCount = 0;
    DynamicInputChunkSet chunkSet = new DynamicInputChunkSet(configuration);

    try {

        while (reader.next(relPath, fileStatus)) {
            if (recordCounter % (nChunksOpenAtOnce * numEntriesPerChunk) == 0) {
                // All chunks full. Create new chunk-set.
                closeAll(openChunks);
                chunksFinal.addAll(openChunks);

                openChunks = createChunks(chunkSet, chunkCount, nChunksTotal, nChunksOpenAtOnce);

                chunkCount += openChunks.size();

                nChunksOpenAtOnce = openChunks.size();
                recordCounter = 0;
            }

            // Shuffle into open chunks.
            openChunks.get(recordCounter % nChunksOpenAtOnce).write(relPath, fileStatus);
            ++recordCounter;
        }

    } finally {
        closeAll(openChunks);
        chunksFinal.addAll(openChunks);
        IOUtils.closeStream(reader);
    }

    LOG.info("Number of dynamic-chunk-files created: " + chunksFinal.size());
    return chunksFinal;
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java

License:Apache License

private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException {
    long lastEnd = 0;

    //Verify if each split's start is matching with the previous end and
    //we are not missing anything
    for (InputSplit split : splits) {
        FileSplit fileSplit = (FileSplit) split;
        long start = fileSplit.getStart();
        Assert.assertEquals(lastEnd, start);
        lastEnd = start + fileSplit.getLength();
    }/*from w w  w.  j ava2s.  c  o m*/

    //Verify there is nothing more to read from the input file
    SequenceFile.Reader reader = new SequenceFile.Reader(cluster.getFileSystem(), listFile,
            cluster.getFileSystem().getConf());
    try {
        reader.seek(lastEnd);
        FileStatus srcFileStatus = new FileStatus();
        Text srcRelPath = new Text();
        Assert.assertFalse(reader.next(srcRelPath, srcFileStatus));
    } finally {
        IOUtils.closeStream(reader);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.UniformSizeInputFormat.java

License:Apache License

private List<InputSplit> getSplits(Configuration configuration, int numSplits, long totalSizeBytes)
        throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
    long nBytesPerSplit = (long) Math.ceil(totalSizeBytes * 1.0 / numSplits);

    FileStatus srcFileStatus = new FileStatus();
    Text srcRelPath = new Text();
    long currentSplitSize = 0;
    long lastSplitStart = 0;
    long lastPosition = 0;

    final Path listingFilePath = getListingFilePath(configuration);

    if (LOG.isDebugEnabled()) {
        LOG.debug("Average bytes per map: " + nBytesPerSplit + ", Number of maps: " + numSplits
                + ", total size: " + totalSizeBytes);
    }//from  ww w . j  a  v  a 2s . c o  m
    SequenceFile.Reader reader = null;
    try {
        reader = getListingFileReader(configuration);
        while (reader.next(srcRelPath, srcFileStatus)) {
            // If adding the current file would cause the bytes per map to exceed
            // limit. Add the current file to new split
            if (currentSplitSize + srcFileStatus.getLen() > nBytesPerSplit && lastPosition != 0) {
                FileSplit split = new FileSplit(listingFilePath, lastSplitStart, lastPosition - lastSplitStart,
                        null);
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Creating split : " + split + ", bytes in split: " + currentSplitSize);
                }
                splits.add(split);
                lastSplitStart = lastPosition;
                currentSplitSize = 0;
            }
            currentSplitSize += srcFileStatus.getLen();
            lastPosition = reader.getPosition();
        }
        if (lastPosition > lastSplitStart) {
            FileSplit split = new FileSplit(listingFilePath, lastSplitStart, lastPosition - lastSplitStart,
                    null);
            if (LOG.isDebugEnabled()) {
                LOG.info("Creating split : " + split + ", bytes in split: " + currentSplitSize);
            }
            splits.add(split);
        }

    } finally {
        IOUtils.closeStream(reader);
    }

    return splits;
}

From source file:com.inmobi.conduit.distcp.tools.SimpleCopyListing.java

License:Apache License

private FileStatus getFileStatus(FileStatus fileStatus) throws IOException {
    FileStatus status = new FileStatus();

    buffer.reset();//from  w  ww.  j a va  2  s. c  o m
    DataOutputStream out = new DataOutputStream(buffer);
    fileStatus.write(out);

    in.reset(buffer.toByteArray(), 0, buffer.size());
    status.readFields(in);
    return status;
}

From source file:com.inmobi.conduit.distcp.tools.TestCopyListing.java

License:Apache License

@Test
public void testBuildListingForSingleFile() {
    FileSystem fs = null;//from  w w w  . jav  a 2  s  . c om
    String testRootString = "/singleFileListing";
    Path testRoot = new Path(testRootString);
    SequenceFile.Reader reader = null;
    try {
        fs = FileSystem.get(getConf());
        if (fs.exists(testRoot))
            TestDistCpUtils.delete(fs, testRootString);

        Path sourceFile = new Path(testRoot, "/source/foo/bar/source.txt");
        Path decoyFile = new Path(testRoot, "/target/moo/source.txt");
        Path targetFile = new Path(testRoot, "/target/moo/target.txt");

        TestDistCpUtils.createFile(fs, sourceFile.toString());
        TestDistCpUtils.createFile(fs, decoyFile.toString());
        TestDistCpUtils.createFile(fs, targetFile.toString());

        List<Path> srcPaths = new ArrayList<Path>();
        srcPaths.add(sourceFile);

        DistCpOptions options = new DistCpOptions(srcPaths, targetFile);
        CopyListing listing = new SimpleCopyListing(getConf(), CREDENTIALS);

        final Path listFile = new Path(testRoot, "/tmp/fileList.seq");
        listing.buildListing(listFile, options);

        reader = new SequenceFile.Reader(fs, listFile, getConf());
        FileStatus fileStatus = new FileStatus();
        Text relativePath = new Text();
        Assert.assertTrue(reader.next(relativePath, fileStatus));
        Assert.assertTrue(relativePath.toString().equals(""));
    } catch (Exception e) {
        Assert.fail("Unexpected exception encountered.");
        LOG.error("Unexpected exception: ", e);
    } finally {
        TestDistCpUtils.delete(fs, testRootString);
        IOUtils.closeStream(reader);
    }
}

From source file:com.inmobi.conduit.distcp.tools.TestFileBasedCopyListing.java

License:Apache License

private void checkResult(Path listFile, int count) throws IOException {
    if (count == 0) {
        return;/* w w  w .j  av a 2s  .  c o m*/
    }

    int recCount = 0;
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, listFile, config);
    try {
        Text relPath = new Text();
        FileStatus fileStatus = new FileStatus();
        while (reader.next(relPath, fileStatus)) {
            Assert.assertEquals(fileStatus.getPath().toUri().getPath(), map.get(relPath.toString()));
            recCount++;
        }
    } finally {
        IOUtils.closeStream(reader);
    }
    Assert.assertEquals(recCount, count);
}