Example usage for org.apache.hadoop.fs FileSystem getDefaultBlockSize

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getDefaultBlockSize.

Prototype

@Deprecated
public long getDefaultBlockSize()

Source Link

Document

Return the number of bytes that large input files should be optimally be split into to minimize I/O time.

Usage

From source file:edu.umn.cs.spatialHadoop.operations.Repartition.java

License:Open Source License

public static CellInfo[] packInRectangles(Path[] files, Path outFile, OperationsParams params,
        Rectangle fileMBR) throws IOException {
    final Vector<Point> sample = new Vector<Point>();

    float sample_ratio = params.getFloat(SpatialSite.SAMPLE_RATIO, 0.01f);
    long sample_size = params.getLong(SpatialSite.SAMPLE_SIZE, 100 * 1024 * 1024);

    LOG.info("Reading a sample of " + (int) Math.round(sample_ratio * 100) + "%");
    ResultCollector<Point> resultCollector = new ResultCollector<Point>() {
        @Override//w  w w  .j  a va  2 s.  c om
        public void collect(Point value) {
            sample.add(value.clone());
        }
    };
    OperationsParams params2 = new OperationsParams(params);
    params2.setFloat("ratio", sample_ratio);
    params2.setLong("size", sample_size);
    params2.setClass("outshape", Point.class, TextSerializable.class);
    Sampler.sample(files, resultCollector, params2);
    LOG.info("Finished reading a sample of size: " + sample.size() + " records");

    long inFileSize = Sampler.sizeOfLastProcessedFile;

    // Compute an approximate MBR to determine the desired number of rows
    // and columns
    Rectangle approxMBR;
    if (fileMBR == null) {
        approxMBR = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE);
        for (Point pt : sample)
            approxMBR.expand(pt);
    } else {
        approxMBR = fileMBR;
    }
    GridInfo gridInfo = new GridInfo(approxMBR.x1, approxMBR.y1, approxMBR.x2, approxMBR.y2);
    FileSystem outFs = outFile.getFileSystem(params);
    @SuppressWarnings("deprecation")
    long blocksize = outFs.getDefaultBlockSize();
    gridInfo.calculateCellDimensions(Math.max(1, (int) ((inFileSize + blocksize / 2) / blocksize)));
    if (fileMBR == null)
        gridInfo.set(-Double.MAX_VALUE, -Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE);
    else
        gridInfo.set(fileMBR);

    Rectangle[] rectangles = RTree.packInRectangles(gridInfo, sample.toArray(new Point[sample.size()]));
    CellInfo[] cellsInfo = new CellInfo[rectangles.length];
    for (int i = 0; i < rectangles.length; i++)
        cellsInfo[i] = new CellInfo(i + 1, rectangles[i]);

    return cellsInfo;
}

From source file:edu.umn.cs.spatialHadoop.operations.Repartition.java

License:Open Source License

@SuppressWarnings("deprecation")
public static <S extends Shape> void repartitionLocal(Path inFile, Path outFile, OperationsParams params)
        throws IOException, InterruptedException {
    String sindex = params.get("sindex");
    long blockSize = params.getSize("blocksize");

    FileSystem inFs = inFile.getFileSystem(new Configuration());
    FileSystem outFs = outFile.getFileSystem(new Configuration());

    // Calculate number of partitions in output file
    if (blockSize == 0) {
        GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inFile);
        if (globalIndex != null) {
            // Copy blocksize from source file if it's globally indexed
            blockSize = inFs.getFileStatus(new Path(inFile, globalIndex.iterator().next().filename))
                    .getBlockSize();/*from   w ww. j a v  a2s . c  o m*/
        } else {
            // Use default block size for output file system
            blockSize = outFs.getDefaultBlockSize();
        }
    }

    // Calculate the dimensions of each partition based on gindex type
    CellInfo[] cells;
    if (sindex.equals("grid")) {
        Rectangle input_mbr = FileMBR.fileMBR(inFile, params);
        long inFileSize = FileMBR.sizeOfLastProcessedFile;
        int num_partitions = calculateNumberOfPartitions(new Configuration(), inFileSize, outFs, outFile,
                blockSize);

        GridInfo gridInfo = new GridInfo(input_mbr.x1, input_mbr.y1, input_mbr.x2, input_mbr.y2);
        gridInfo.calculateCellDimensions(num_partitions);
        cells = gridInfo.getAllCells();
    } else if (sindex.equals("rtree") || sindex.equals("r+tree") || sindex.equals("str")
            || sindex.equals("str+")) {
        cells = packInRectangles(inFile, outFile, params);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    repartitionLocal(inFile, outFile, cells, params);
}

From source file:edu.umn.cs.spatialHadoop.temporal.RepartitionTemporal.java

License:Apache License

public static void repartitionMapReduce(Path[] inputPaths, Path outputPath, OperationsParams params)
        throws IOException, InterruptedException {
    String sindex = params.get("sindex");
    boolean overwrite = params.getBoolean("overwrite", false);
    Shape stockShape = params.getShape("shape");

    FileSystem outFs = outputPath.getFileSystem(params);

    @SuppressWarnings("deprecation")
    final long blockSize = outFs.getDefaultBlockSize();

    // Calculate the dimensions of each partition based on gindex type
    CellInfo[] cellInfos;/*from w  ww  .j ava  2  s  .  c o  m*/
    if (sindex.equals("grid")) {
        Rectangle inputMBR = FileMBR.fileMBR(inputPaths[0], params);
        long inputFileSize = FileMBR.sizeOfLastProcessedFile;
        for (int i = 1; i < inputPaths.length; i++) {
            Rectangle currentInputMBR = FileMBR.fileMBR(inputPaths[i], params);
            inputMBR.expand(currentInputMBR);
            inputFileSize = inputFileSize + FileMBR.sizeOfLastProcessedFile;
        }

        int num_partitions = calculateNumberOfPartitions(new Configuration(), inputFileSize, outFs, outputPath,
                blockSize);

        GridInfo gridInfo = new GridInfo(inputMBR.x1, inputMBR.y1, inputMBR.x2, inputMBR.y2);
        gridInfo.calculateCellDimensions(num_partitions);
        cellInfos = gridInfo.getAllCells();
    } else if (sindex.equals("rtree") || sindex.equals("r+tree") || sindex.equals("str")
            || sindex.equals("str+")) {
        // Pack in rectangles using an RTree
        cellInfos = packInRectangles(inputPaths, outputPath, params, null);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    JobConf job = new JobConf(params, RepartitionTemporal.class);
    job.setJobName("RepartitionTemporal");

    // Overwrite output file
    if (outFs.exists(outputPath)) {
        if (overwrite)
            outFs.delete(outputPath, true);
        else
            throw new RuntimeException(
                    "Output file '" + outputPath + "' already exists and overwrite flag is not set");
    }

    // Decide which map function to use depending on the type of global
    // index
    if (sindex.equals("rtree") || sindex.equals("str")) {
        // Repartition without replication
        job.setMapperClass(RepartitionMapNoReplication.class);
    } else {
        // Repartition with replication (grid, str+, and r+tree)
        job.setMapperClass(RepartitionMap.class);
    }
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(stockShape.getClass());
    CombinedSpatialInputFormat.setInputPaths(job, inputPaths);
    job.setInputFormat(CombinedSpatialInputFormat.class);

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

    FileOutputFormat.setOutputPath(job, outputPath);
    if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) {
        job.setOutputFormat(GridOutputFormat.class);
    } else if (sindex.equals("rtree") || sindex.equals("r+tree")) {
        // For now, the two types of local index are the same
        job.setOutputFormat(RTreeGridOutputFormat.class);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    SpatialSite.setCells(job, cellInfos);
    job.setBoolean(SpatialSite.OVERWRITE, overwrite);

    // Set reduce function
    job.setReducerClass(RepartitionReduce.class);
    job.setNumReduceTasks(
            Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));

    // Set output committer that combines output files together
    job.setOutputCommitter(RepartitionOutputCommitter.class);

    JobClient.runJob(job);

}

From source file:fuse4j.hadoopfs.HdfsClientImpl.java

License:Apache License

@Override
public FuseStatfs getStatus(int uid) {
    FileSystem dfs = null;
    try {/*from ww w.  j a v  a  2s .c om*/
        dfs = getDfs(uid);
        FsStatus status = dfs.getStatus();
        long cap = status.getCapacity();
        long bsize = dfs.getDefaultBlockSize();
        long used = status.getUsed();

        FuseStatfs statFS = new FuseStatfs();
        statFS.blockSize = (int) bsize;
        statFS.blocks = (int) (cap / bsize);
        statFS.blocksFree = (int) ((cap - used) / bsize);
        statFS.blocksAvail = (int) ((cap - used) / bsize);
        statFS.files = 1000;
        statFS.filesFree = 500;
        statFS.namelen = 1023;
        return statFS;
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
}

From source file:fuse4j.hadoopfs.HdfsClientImpl.java

License:Apache License

/**
 * getFileInfo()/*from  w w w  . ja v  a 2 s .  c  o m*/
 */
@Override
public HdfsFileAttr getFileInfo(int uid, String path) {
    FileSystem dfs = null;
    try {
        dfs = getDfs(uid);
        FileStatus dfsStat = dfs.getFileStatus(new Path(path));

        final boolean directory = dfsStat.isDir();
        final int inode = 0;
        final int mode = dfsStat.getPermission().toShort();
        final int uuid = userCache.getUid(dfsStat.getOwner());
        final int gid = 0;

        // TODO: per-file block-size can't be retrieved correctly,
        //       using default block size for now.
        final long size = dfsStat.getLen();
        final int blocks = (int) Math.ceil(((double) size) / dfs.getDefaultBlockSize());

        // modification/create-times are the same as access-time
        final int modificationTime = (int) (dfsStat.getModificationTime() / 1000);
        final int accessTime = (int) (dfsStat.getAccessTime() / 1000);

        HdfsFileAttr hdfsFileAttr = new HdfsFileAttr(directory, inode, mode, uuid, gid, 1);
        hdfsFileAttr.setSize(size, blocks);
        hdfsFileAttr.setTime(modificationTime, modificationTime, accessTime);

        // TODO Hack to set inode;
        hdfsFileAttr.inode = hdfsFileAttr.hashCode();

        return hdfsFileAttr;
    } catch (Exception ioe) {
        // fall through to failure
    }

    // failed
    return null;
}

From source file:hdfs.HdfsFileWriter.java

License:Apache License

public HdfsFileWriter(FileSystem fileSystem, Path path) throws IOException {
    LOG.debug("Creating writer on {}", path);
    this.path = path;

    Configuration conf = fileSystem.getConf();

    //    FsServerDefaults fsDefaults = fileSystem.getServerDefaults(path);
    //   //from  w  w  w. jav a2 s  . c o m
    //    EnumSet<CreateFlag> flags = EnumSet.of(CreateFlag.CREATE,
    //        CreateFlag.OVERWRITE);
    //    if (Boolean.getBoolean(HDFS_SYNC_BLOCK)) {
    //      flags.add(CreateFlag.SYNC_BLOCK);
    //    }
    //    fileSystem.getoutputStream = fileSystem.create(
    //          path,
    //          FsPermission.getDefault().applyUMask(FsPermission.getUMask(conf)),
    //          flags, 
    //          fsDefaults.getFileBufferSize(),
    //        fsDefaults.getReplication(),
    //        fsDefaults.getBlockSize(), null);
    //    fileSystem.

    this.outputStream = fileSystem.create(path, FsPermission.getDefault(), true, 50000,
            fileSystem.getDefaultReplication(), fileSystem.getDefaultBlockSize(), null);

}

From source file:org.apache.ignite.loadtests.igfs.IgfsPerformanceBenchmark.java

License:Apache License

/**
 * Tests stream write to specified file.
 *
 * @param file File to write to./*from  ww  w  .j av a  2s  .  c om*/
 * @param len Length to write.
 * @param bufSize Buffer size.
 * @param replication Replication factor.
 * @param progress Progress that will be incremented on each written chunk.
 */
private static void benchmarkWrite(FileSystem fs, Path file, long len, int bufSize, short replication,
        @Nullable AtomicLong progress) throws Exception {

    try (FSDataOutputStream out = fs.create(file, true, bufSize, replication, fs.getDefaultBlockSize())) {
        long written = 0;

        byte[] data = new byte[bufSize];

        while (written < len) {
            int chunk = (int) Math.min(len - written, bufSize);

            out.write(data, 0, chunk);

            written += chunk;

            if (progress != null)
                progress.addAndGet(chunk);
        }

        out.flush();
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    }
}

From source file:org.apache.impala.catalog.HdfsTable.java

License:Apache License

/**
 * Helper method to synthesize block metadata for file descriptor fd.
 *///from   ww w . jav  a 2  s  .  c o m
private void synthesizeFdBlockMetadata(FileSystem fs, FileDescriptor fd, HdfsFileFormat fileFormat) {
    long start = 0;
    long remaining = fd.getFileLength();
    // Workaround HADOOP-11584 by using the filesystem default block size rather than
    // the block size from the FileStatus.
    // TODO: after HADOOP-11584 is resolved, get the block size from the FileStatus.
    long blockSize = fs.getDefaultBlockSize();
    if (blockSize < MIN_SYNTHETIC_BLOCK_SIZE)
        blockSize = MIN_SYNTHETIC_BLOCK_SIZE;
    if (!fileFormat.isSplittable(HdfsCompression.fromFileName(fd.getFileName()))) {
        blockSize = remaining;
    }
    while (remaining > 0) {
        long len = Math.min(remaining, blockSize);
        List<BlockReplica> replicas = Lists
                .newArrayList(new BlockReplica(hostIndex_.getIndex(REMOTE_NETWORK_ADDRESS), false));
        fd.addFileBlock(new FileBlock(start, len, replicas));
        remaining -= len;
        start += len;
    }
}

From source file:org.apache.pig.builtin.TrevniStorage.java

License:Apache License

@Override
public OutputFormat<NullWritable, Object> getOutputFormat() throws IOException {
    class TrevniStorageOutputFormat extends FileOutputFormat<NullWritable, Object> {

        private Schema schema;

        TrevniStorageOutputFormat(final Schema s) {
            schema = s;//from   ww w  . j  a v a  2 s  .  c o m
            if (s == null) {
                String schemaString = getProperties(AvroStorage.class, udfContextSignature)
                        .getProperty(OUTPUT_AVRO_SCHEMA);
                if (schemaString != null) {
                    schema = (new Schema.Parser()).parse(schemaString);
                }
            }

        }

        @Override
        public RecordWriter<NullWritable, Object> getRecordWriter(final TaskAttemptContext tc)
                throws IOException, InterruptedException {

            if (schema == null) {
                String schemaString = getProperties(AvroStorage.class, udfContextSignature)
                        .getProperty(OUTPUT_AVRO_SCHEMA);
                if (schemaString != null) {
                    schema = (new Schema.Parser()).parse(schemaString);
                }
                if (schema == null) {
                    throw new IOException("Null output schema");
                }
            }

            final ColumnFileMetaData meta = new ColumnFileMetaData();

            for (Entry<String, String> e : tc.getConfiguration()) {
                if (e.getKey().startsWith(org.apache.trevni.avro.AvroTrevniOutputFormat.META_PREFIX)) {
                    meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),
                            e.getValue().getBytes(MetaData.UTF8));
                }
            }

            final Path dir = getOutputPath(tc);
            final FileSystem fs = FileSystem.get(tc.getConfiguration());
            final long blockSize = fs.getDefaultBlockSize();

            if (!fs.mkdirs(dir)) {
                throw new IOException("Failed to create directory: " + dir);
            }

            meta.setCodec("deflate");

            return new AvroRecordWriter(dir, tc.getConfiguration()) {
                private int part = 0;
                private Schema avroRecordWriterSchema;
                private AvroColumnWriter<GenericData.Record> writer;

                private void flush() throws IOException {
                    Integer taskAttemptId = tc.getTaskAttemptID().getTaskID().getId();
                    String partName = String.format("%05d_%03d", taskAttemptId, part++);
                    OutputStream out = fs
                            .create(new Path(dir, "part-" + partName + AvroTrevniOutputFormat.EXT));
                    try {
                        writer.writeTo(out);
                    } finally {
                        out.flush();
                        out.close();
                    }
                }

                @Override
                public void close(final TaskAttemptContext arg0) throws IOException, InterruptedException {
                    flush();
                }

                @Override
                public void write(final NullWritable n, final Object o)
                        throws IOException, InterruptedException {
                    GenericData.Record r = AvroStorageDataConversionUtilities.packIntoAvro((Tuple) o, schema);
                    writer.write(r);
                    if (writer.sizeEstimate() >= blockSize) {
                        flush();
                        writer = new AvroColumnWriter<GenericData.Record>(avroRecordWriterSchema, meta);
                    }
                }

                @Override
                public void prepareToWrite(Schema s) throws IOException {
                    avroRecordWriterSchema = s;
                    writer = new AvroColumnWriter<GenericData.Record>(avroRecordWriterSchema, meta);
                }
            };
        }
    }

    return new TrevniStorageOutputFormat(schema);
}

From source file:org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteSparkReducer.java

License:Apache License

@Override
@SuppressWarnings("deprecation")
public void call(Tuple2<Long, Iterable<Writable>> arg0) throws Exception {
    //prepare grouped partition input
    Long key = arg0._1();//  w  w w .j a v  a 2s  . c o m
    Iterator<Writable> valueList = arg0._2().iterator();

    //write entire partition to binary block sequence file
    SequenceFile.Writer writer = null;
    try {
        //create sequence file writer
        Configuration job = new Configuration(ConfigurationManager.getCachedJobConf());
        Path path = new Path(_fnameNew + File.separator + key);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class,
                job.getInt(MRConfigurationNames.IO_FILE_BUFFER_SIZE, 4096), (short) _replication,
                fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());

        //write individual blocks unordered to output
        while (valueList.hasNext()) {
            PairWritableBlock pair = (PairWritableBlock) valueList.next();
            writer.append(pair.indexes, pair.block);
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
}