Example usage for org.apache.hadoop.fs FileStatus getBlockSize

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getBlockSize.

Prototype

public long getBlockSize()

Source Link

Document

Get the block size of the file.

Usage

From source file:eagle.security.hdfs.entity.FileStatusEntity.java

License:Apache License

public FileStatusEntity(FileStatus status) throws IOException {
    //this.path = status.getPath();
    this.length = status.getLen();
    this.isdir = status.isDirectory();
    this.block_replication = status.getReplication();
    this.blocksize = status.getBlockSize();
    this.modification_time = status.getModificationTime();
    this.access_time = status.getAccessTime();
    this.permission = status.getPermission();
    this.owner = status.getOwner();
    this.group = status.getGroup();
    if (status.isSymlink()) {
        this.symlink = status.getSymlink();
    }/*from w w w .jav a  2 s . c o  m*/
}

From source file:edu.ucsb.cs.hadoop.CustomFileInputFormat.java

License:Apache License

/**
 * Splits files returned by {@link #listStatus(JobConf)} when they're too
 * big./*  ww  w . j  a v a 2 s  . c o m*/
 */
@SuppressWarnings("deprecation")
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    FileStatus[] files = listStatus(job);

    long totalSize = 0; // compute total size
    for (FileStatus file : files) { // check we have valid files
        if (file.isDir()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }

    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
    long minSize = Math.max(job.getLong("mapred.min.split.size", 1), minSplitSize);

    // generate splits
    ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
    NetworkTopology clusterMap = new NetworkTopology();
    for (FileStatus file : files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job);
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(fs, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(goalSize, minSize, blockSize);

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                String[] splitHosts = getSplitHosts(blkLocations, length - bytesRemaining, splitSize,
                        clusterMap);
                splits.add(new FileSplit(path, length - bytesRemaining, splitSize, splitHosts));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                        blkLocations[blkLocations.length - 1].getHosts()));
            }
        } else if (length != 0) {
            String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap);
            splits.add(new FileSplit(path, 0, length, splitHosts));
        } else {
            // Create empty hosts array for zero length files
            splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }
    LOG.debug("Total # of splits: " + splits.size());
    return splits.toArray(new FileSplit[splits.size()]);
}

From source file:edu.umn.cs.spatialHadoop.mapred.CombinedSpatialInputFormat.java

License:Apache License

public void splitFile(JobConf job, Path path, List<FileSplit> splits) throws IOException {
    NetworkTopology clusterMap = new NetworkTopology();
    FileSystem fs = path.getFileSystem(job);
    FileStatus file = fs.getFileStatus(path);
    long length = file.getLen();
    BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
    if (length != 0) {
        long blockSize = file.getBlockSize();
        long splitSize = blockSize;

        long bytesRemaining = length;
        while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
            String[] splitHosts = getSplitHosts(blkLocations, length - bytesRemaining, splitSize, clusterMap);
            splits.add(new FileSplit(path, length - bytesRemaining, splitSize, splitHosts));
            bytesRemaining -= splitSize;
        }/*from   w  w w  . ja  v  a 2 s.  c o  m*/

        if (bytesRemaining != 0) {
            splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                    blkLocations[blkLocations.length - 1].getHosts()));
        }
    } else if (length != 0) {
        String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap);
        splits.add(new FileSplit(path, 0, length, splitHosts));
    } else {
        // Create empty hosts array for zero length files
        splits.add(new FileSplit(path, 0, length, new String[0]));
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.Plot.java

License:Apache License

public static <S extends Shape> void plotMapReduce(Path inFile, Path outFile, Shape shape, int width,
        int height, Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount,
        boolean background) throws IOException {
    JobConf job = new JobConf(Plot.class);
    job.setJobName("Plot");

    job.setMapperClass(PlotMap.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setReducerClass(PlotReduce.class);
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));
    job.setMapOutputKeyClass(Rectangle.class);
    SpatialSite.setShapeClass(job, shape.getClass());
    job.setMapOutputValueClass(shape.getClass());

    FileSystem inFs = inFile.getFileSystem(job);
    Rectangle fileMbr = FileMBR.fileMBRMapReduce(inFs, inFile, shape, false);
    FileStatus inFileStatus = inFs.getFileStatus(inFile);

    CellInfo[] cellInfos;//from  www  .  j  a va 2  s . c o m
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(inFs, inFile);
    if (gindex == null) {
        // A heap file. The map function should partition the file
        GridInfo gridInfo = new GridInfo(fileMbr.x1, fileMbr.y1, fileMbr.x2, fileMbr.y2);
        gridInfo.calculateCellDimensions(inFileStatus.getLen(), inFileStatus.getBlockSize());
        cellInfos = gridInfo.getAllCells();
        // Doesn't make sense to show any partition information in a heap file
        showBorders = showBlockCount = showRecordCount = false;
    } else {
        cellInfos = SpatialSite.cellsOf(inFs, inFile);
    }

    // Set cell information in the job configuration to be used by the mapper
    SpatialSite.setCells(job, cellInfos);

    // Adjust width and height to maintain aspect ratio
    if ((fileMbr.x2 - fileMbr.x1) / (fileMbr.y2 - fileMbr.y1) > (double) width / height) {
        // Fix width and change height
        height = (int) ((fileMbr.y2 - fileMbr.y1) * width / (fileMbr.x2 - fileMbr.x1));
    } else {
        width = (int) ((fileMbr.x2 - fileMbr.x1) * height / (fileMbr.y2 - fileMbr.y1));
    }
    LOG.info("Creating an image of size " + width + "x" + height);
    ImageOutputFormat.setFileMBR(job, fileMbr);
    ImageOutputFormat.setImageWidth(job, width);
    ImageOutputFormat.setImageHeight(job, height);
    job.setBoolean(ShowBorders, showBorders);
    job.setBoolean(ShowBlockCount, showBlockCount);
    job.setBoolean(ShowRecordCount, showRecordCount);
    job.setInt(StrokeColor, color.getRGB());

    // Set input and output
    job.setInputFormat(ShapeInputFormat.class);
    ShapeInputFormat.addInputPath(job, inFile);
    // Set output committer which will stitch images together after all reducers
    // finish
    job.setOutputCommitter(PlotOutputCommitter.class);

    job.setOutputFormat(ImageOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outFile);

    if (background) {
        JobClient jc = new JobClient(job);
        lastSubmittedJob = jc.submitJob(job);
    } else {
        lastSubmittedJob = JobClient.runJob(job);
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.Sampler.java

License:Open Source License

private static int sampleWithRatio(Path[] files, final ResultCollector<? extends TextSerializable> output,
        OperationsParams params) throws IOException {
    FileSystem fs = files[0].getFileSystem(params);
    FileStatus inFStatus = fs.getFileStatus(files[0]);
    if (inFStatus.isDir() || inFStatus.getLen() / inFStatus.getBlockSize() > 1) {
        // Either a directory of file or a large file
        return sampleMapReduceWithRatio(files, output, params);
    } else {/*from w ww.ja  v a  2s .co  m*/
        // A single small file, process it without MapReduce
        return sampleLocalWithRatio(files, output, params);
    }
}

From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java

License:Open Source License

/**
 * Lists the contents of a directory//from  ww w .  ja va 2 s.co m
 * @param request
 * @param response
 */
private void handleListFiles(HttpServletRequest request, HttpServletResponse response) {
    try {
        String pathStr = request.getParameter("path");
        Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr);
        FileSystem fs = path.getFileSystem(commonParams);
        FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter);
        Arrays.sort(fileStatuses, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                if (o1.isDirectory() && o2.isFile())
                    return -1;
                if (o1.isFile() && o2.isDirectory())
                    return 1;
                return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase());
            }
        });
        response.setContentType("application/json;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_OK);
        PrintWriter out = response.getWriter();
        out.print("{\"FileStatuses\":{");
        if (pathStr.endsWith("/")) {
            pathStr = pathStr.substring(0, pathStr.length() - 1);
        }
        out.printf("\"BaseDir\":\"%s\",", pathStr);
        if (path.getParent() != null)
            out.printf("\"ParentDir\":\"%s\",", path.getParent());
        out.print("\"FileStatus\":[");
        for (int i = 0; i < fileStatuses.length; i++) {
            FileStatus fileStatus = fileStatuses[i];
            if (i != 0)
                out.print(',');
            String filename = fileStatus.getPath().getName();
            int idot = filename.lastIndexOf('.');
            String extension = idot == -1 ? "" : filename.substring(idot + 1);
            out.printf(
                    "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d,"
                            + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d,"
                            + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\","
                            + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}",
                    fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(),
                    fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(),
                    fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0,
                    fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase());
        }
        out.print("]}");
        // Check if there is an image or master file
        FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("_master") || path.getName().equals("_data.png");
            }
        });
        for (FileStatus metaFile : metaFiles) {
            String metaFileName = metaFile.getPath().getName();
            if (metaFileName.startsWith("_master")) {
                out.printf(",\"MasterPath\":\"%s\"", metaFileName);
                String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams);
                if (shape != null)
                    out.printf(",\"Shape\":\"%s\"", shape);
            } else if (metaFileName.equals("_data.png"))
                out.printf(",\"ImagePath\":\"%s\"", metaFileName);
        }
        out.print("}");

        out.close();
    } catch (Exception e) {
        System.out.println("error happened");
        e.printStackTrace();
        try {
            e.printStackTrace(response.getWriter());
        } catch (IOException ioe) {
            ioe.printStackTrace();
            e.printStackTrace();
        }
        response.setContentType("text/plain;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
    }
}

From source file:gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter.java

License:Apache License

/**
 * Write the contents of input stream into staging path.
 *
 * <p>// ww w.  j av a 2s . co  m
 *   WriteAt indicates the path where the contents of the input stream should be written. When this method is called,
 *   the path writeAt.getParent() will exist already, but the path writeAt will not exist. When this method is returned,
 *   the path writeAt must exist. Any data written to any location other than writeAt or a descendant of writeAt
 *   will be ignored.
 * </p>
 *
 * @param inputStream {@link FSDataInputStream} whose contents should be written to staging path.
 * @param writeAt {@link Path} at which contents should be written.
 * @param copyableFile {@link gobblin.data.management.copy.CopyEntity} that generated this copy operation.
 * @throws IOException
 */
protected void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile) throws IOException {

    final short replication = copyableFile.getPreserve().preserve(PreserveAttributes.Option.REPLICATION)
            ? copyableFile.getOrigin().getReplication()
            : this.fs.getDefaultReplication(writeAt);
    final long blockSize = copyableFile.getPreserve().preserve(PreserveAttributes.Option.BLOCK_SIZE)
            ? copyableFile.getOrigin().getBlockSize()
            : this.fs.getDefaultBlockSize(writeAt);

    Predicate<FileStatus> fileStatusAttributesFilter = new Predicate<FileStatus>() {
        @Override
        public boolean apply(FileStatus input) {
            return input.getReplication() == replication && input.getBlockSize() == blockSize;
        }
    };
    Optional<FileStatus> persistedFile = this.recoveryHelper.findPersistedFile(this.state, copyableFile,
            fileStatusAttributesFilter);

    if (persistedFile.isPresent()) {
        log.info(String.format("Recovering persisted file %s to %s.", persistedFile.get().getPath(), writeAt));
        this.fs.rename(persistedFile.get().getPath(), writeAt);
    } else {

        OutputStream os = this.fs.create(writeAt, true, this.fs.getConf().getInt("io.file.buffer.size", 4096),
                replication, blockSize);
        if (encryptionConfig != null) {
            os = EncryptionFactory.buildStreamCryptoProvider(encryptionConfig).encodeOutputStream(os);
        }
        try {

            StreamThrottler<GobblinScopeTypes> throttler = this.taskBroker
                    .getSharedResource(new StreamThrottler.Factory<GobblinScopeTypes>(), new EmptyKey());
            ThrottledInputStream throttledInputStream = throttler.throttleInputStream().inputStream(inputStream)
                    .sourceURI(copyableFile.getOrigin().getPath().toUri())
                    .targetURI(this.fs.makeQualified(writeAt).toUri()).build();
            StreamCopier copier = new StreamCopier(throttledInputStream, os).withBufferSize(this.bufferSize);

            if (isInstrumentationEnabled()) {
                copier.withCopySpeedMeter(this.copySpeedMeter);
            }
            this.bytesWritten.addAndGet(copier.copy());
            if (isInstrumentationEnabled()) {
                log.info("File {}: copied {} bytes, average rate: {} B/s", copyableFile.getOrigin().getPath(),
                        this.copySpeedMeter.getCount(), this.copySpeedMeter.getMeanRate());
            } else {
                log.info("File {} copied.", copyableFile.getOrigin().getPath());
            }
        } catch (NotConfiguredException nce) {
            log.warn("Broker error. Some features of stream copier may not be available.", nce);
        } finally {
            os.close();
            inputStream.close();
        }
    }
}

From source file:gobblin.util.filesystem.InstrumentedFileSystemUtils.java

License:Apache License

/**
 * Replace the scheme of the input {@link FileStatus} if it matches the string to replace.
 *//*w  w  w . j  a va 2s  .c  o m*/
public static FileStatus replaceScheme(FileStatus st, String replace, String replacement) {
    if (replace != null && replace.equals(replacement)) {
        return st;
    }
    try {
        return new FileStatus(st.getLen(), st.isDir(), st.getReplication(), st.getBlockSize(),
                st.getModificationTime(), st.getAccessTime(), st.getPermission(), st.getOwner(), st.getGroup(),
                st.isSymlink() ? st.getSymlink() : null, replaceScheme(st.getPath(), replace, replacement));
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}

From source file:gr.ntua.h2rdf.inputFormat.MyFileInputFormat.java

License:Open Source License

/** 
 * Generate the list of files and make them into FileSplits.
 *//*from www  . ja va 2  s  .  co  m*/
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    for (FileStatus file : listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(job, path)) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(blockSize, minSize, maxSize);

            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                splits.add(new MyFileSplit(path, length - bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts()));
                bytesRemaining -= splitSize;
            }

            if (bytesRemaining != 0) {
                splits.add(new MyFileSplit(path, length - bytesRemaining, bytesRemaining,
                        blkLocations[blkLocations.length - 1].getHosts()));
            }
        } else if (length != 0) {
            splits.add(new MyFileSplit(path, 0, length, blkLocations[0].getHosts()));
        } else {
            //Create empty hosts array for zero length files
            splits.add(new MyFileSplit(path, 0, length, new String[0]));
        }
    }
    LOG.debug("Total # of splits: " + splits.size());

    String p = job.getConfiguration().get("mapred.fairscheduler.pool");
    int max = Integer.parseInt(p.substring(p.indexOf("l") + 1));

    if (splits.size() <= max)
        job.getConfiguration().setInt("mapred.reduce.tasks", splits.size());
    else
        job.getConfiguration().setInt("mapred.reduce.tasks", max);
    return splits;
}

From source file:hadoop.example.hdfs.ShowFileStatusTestCase.java

License:Open Source License

@Test
public void fileStatusForFile() throws IOException {
    Path file = new Path("/dir/file");
    FileStatus stat = fs.getFileStatus(file);
    Assert.assertEquals("/dir/file", stat.getPath().toUri().getPath());
    Assert.assertFalse(stat.isDir());//from   w  w w . j  a  va 2  s  . co  m
    Assert.assertEquals(stat.getLen(), 7L);
    Assert.assertEquals(stat.getReplication(), 1);
    Assert.assertEquals(stat.getBlockSize(), 64 * 1024 * 1024L);
    Assert.assertEquals(stat.getOwner(), "haint");
    Assert.assertEquals(stat.getGroup(), "supergroup");
    Assert.assertEquals(stat.getPermission().toString(), "rw-r--r--");
}