List of usage examples for org.apache.hadoop.fs FileSystem getDefaultBlockSize
@Deprecated public long getDefaultBlockSize()
From source file:edu.umn.cs.spatialHadoop.operations.Repartition.java
License:Open Source License
public static CellInfo[] packInRectangles(Path[] files, Path outFile, OperationsParams params, Rectangle fileMBR) throws IOException { final Vector<Point> sample = new Vector<Point>(); float sample_ratio = params.getFloat(SpatialSite.SAMPLE_RATIO, 0.01f); long sample_size = params.getLong(SpatialSite.SAMPLE_SIZE, 100 * 1024 * 1024); LOG.info("Reading a sample of " + (int) Math.round(sample_ratio * 100) + "%"); ResultCollector<Point> resultCollector = new ResultCollector<Point>() { @Override//w w w .j a va 2 s. c om public void collect(Point value) { sample.add(value.clone()); } }; OperationsParams params2 = new OperationsParams(params); params2.setFloat("ratio", sample_ratio); params2.setLong("size", sample_size); params2.setClass("outshape", Point.class, TextSerializable.class); Sampler.sample(files, resultCollector, params2); LOG.info("Finished reading a sample of size: " + sample.size() + " records"); long inFileSize = Sampler.sizeOfLastProcessedFile; // Compute an approximate MBR to determine the desired number of rows // and columns Rectangle approxMBR; if (fileMBR == null) { approxMBR = new Rectangle(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE); for (Point pt : sample) approxMBR.expand(pt); } else { approxMBR = fileMBR; } GridInfo gridInfo = new GridInfo(approxMBR.x1, approxMBR.y1, approxMBR.x2, approxMBR.y2); FileSystem outFs = outFile.getFileSystem(params); @SuppressWarnings("deprecation") long blocksize = outFs.getDefaultBlockSize(); gridInfo.calculateCellDimensions(Math.max(1, (int) ((inFileSize + blocksize / 2) / blocksize))); if (fileMBR == null) gridInfo.set(-Double.MAX_VALUE, -Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE); else gridInfo.set(fileMBR); Rectangle[] rectangles = RTree.packInRectangles(gridInfo, sample.toArray(new Point[sample.size()])); CellInfo[] cellsInfo = new CellInfo[rectangles.length]; for (int i = 0; i < rectangles.length; i++) cellsInfo[i] = new CellInfo(i + 1, rectangles[i]); return cellsInfo; }
From source file:edu.umn.cs.spatialHadoop.operations.Repartition.java
License:Open Source License
@SuppressWarnings("deprecation") public static <S extends Shape> void repartitionLocal(Path inFile, Path outFile, OperationsParams params) throws IOException, InterruptedException { String sindex = params.get("sindex"); long blockSize = params.getSize("blocksize"); FileSystem inFs = inFile.getFileSystem(new Configuration()); FileSystem outFs = outFile.getFileSystem(new Configuration()); // Calculate number of partitions in output file if (blockSize == 0) { GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inFile); if (globalIndex != null) { // Copy blocksize from source file if it's globally indexed blockSize = inFs.getFileStatus(new Path(inFile, globalIndex.iterator().next().filename)) .getBlockSize();/*from w ww. j a v a2s . c o m*/ } else { // Use default block size for output file system blockSize = outFs.getDefaultBlockSize(); } } // Calculate the dimensions of each partition based on gindex type CellInfo[] cells; if (sindex.equals("grid")) { Rectangle input_mbr = FileMBR.fileMBR(inFile, params); long inFileSize = FileMBR.sizeOfLastProcessedFile; int num_partitions = calculateNumberOfPartitions(new Configuration(), inFileSize, outFs, outFile, blockSize); GridInfo gridInfo = new GridInfo(input_mbr.x1, input_mbr.y1, input_mbr.x2, input_mbr.y2); gridInfo.calculateCellDimensions(num_partitions); cells = gridInfo.getAllCells(); } else if (sindex.equals("rtree") || sindex.equals("r+tree") || sindex.equals("str") || sindex.equals("str+")) { cells = packInRectangles(inFile, outFile, params); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } repartitionLocal(inFile, outFile, cells, params); }
From source file:edu.umn.cs.spatialHadoop.temporal.RepartitionTemporal.java
License:Apache License
public static void repartitionMapReduce(Path[] inputPaths, Path outputPath, OperationsParams params) throws IOException, InterruptedException { String sindex = params.get("sindex"); boolean overwrite = params.getBoolean("overwrite", false); Shape stockShape = params.getShape("shape"); FileSystem outFs = outputPath.getFileSystem(params); @SuppressWarnings("deprecation") final long blockSize = outFs.getDefaultBlockSize(); // Calculate the dimensions of each partition based on gindex type CellInfo[] cellInfos;/*from w ww .j ava 2 s . c o m*/ if (sindex.equals("grid")) { Rectangle inputMBR = FileMBR.fileMBR(inputPaths[0], params); long inputFileSize = FileMBR.sizeOfLastProcessedFile; for (int i = 1; i < inputPaths.length; i++) { Rectangle currentInputMBR = FileMBR.fileMBR(inputPaths[i], params); inputMBR.expand(currentInputMBR); inputFileSize = inputFileSize + FileMBR.sizeOfLastProcessedFile; } int num_partitions = calculateNumberOfPartitions(new Configuration(), inputFileSize, outFs, outputPath, blockSize); GridInfo gridInfo = new GridInfo(inputMBR.x1, inputMBR.y1, inputMBR.x2, inputMBR.y2); gridInfo.calculateCellDimensions(num_partitions); cellInfos = gridInfo.getAllCells(); } else if (sindex.equals("rtree") || sindex.equals("r+tree") || sindex.equals("str") || sindex.equals("str+")) { // Pack in rectangles using an RTree cellInfos = packInRectangles(inputPaths, outputPath, params, null); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } JobConf job = new JobConf(params, RepartitionTemporal.class); job.setJobName("RepartitionTemporal"); // Overwrite output file if (outFs.exists(outputPath)) { if (overwrite) outFs.delete(outputPath, true); else throw new RuntimeException( "Output file '" + outputPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global // index if (sindex.equals("rtree") || sindex.equals("str")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid, str+, and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); CombinedSpatialInputFormat.setInputPaths(job, inputPaths); job.setInputFormat(CombinedSpatialInputFormat.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); FileOutputFormat.setOutputPath(job, outputPath); if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); JobClient.runJob(job); }
From source file:fuse4j.hadoopfs.HdfsClientImpl.java
License:Apache License
@Override public FuseStatfs getStatus(int uid) { FileSystem dfs = null; try {/*from ww w. j a v a 2s .c om*/ dfs = getDfs(uid); FsStatus status = dfs.getStatus(); long cap = status.getCapacity(); long bsize = dfs.getDefaultBlockSize(); long used = status.getUsed(); FuseStatfs statFS = new FuseStatfs(); statFS.blockSize = (int) bsize; statFS.blocks = (int) (cap / bsize); statFS.blocksFree = (int) ((cap - used) / bsize); statFS.blocksAvail = (int) ((cap - used) / bsize); statFS.files = 1000; statFS.filesFree = 500; statFS.namelen = 1023; return statFS; } catch (Exception e) { e.printStackTrace(); return null; } }
From source file:fuse4j.hadoopfs.HdfsClientImpl.java
License:Apache License
/** * getFileInfo()/*from w w w . ja v a 2 s . c o m*/ */ @Override public HdfsFileAttr getFileInfo(int uid, String path) { FileSystem dfs = null; try { dfs = getDfs(uid); FileStatus dfsStat = dfs.getFileStatus(new Path(path)); final boolean directory = dfsStat.isDir(); final int inode = 0; final int mode = dfsStat.getPermission().toShort(); final int uuid = userCache.getUid(dfsStat.getOwner()); final int gid = 0; // TODO: per-file block-size can't be retrieved correctly, // using default block size for now. final long size = dfsStat.getLen(); final int blocks = (int) Math.ceil(((double) size) / dfs.getDefaultBlockSize()); // modification/create-times are the same as access-time final int modificationTime = (int) (dfsStat.getModificationTime() / 1000); final int accessTime = (int) (dfsStat.getAccessTime() / 1000); HdfsFileAttr hdfsFileAttr = new HdfsFileAttr(directory, inode, mode, uuid, gid, 1); hdfsFileAttr.setSize(size, blocks); hdfsFileAttr.setTime(modificationTime, modificationTime, accessTime); // TODO Hack to set inode; hdfsFileAttr.inode = hdfsFileAttr.hashCode(); return hdfsFileAttr; } catch (Exception ioe) { // fall through to failure } // failed return null; }
From source file:hdfs.HdfsFileWriter.java
License:Apache License
public HdfsFileWriter(FileSystem fileSystem, Path path) throws IOException { LOG.debug("Creating writer on {}", path); this.path = path; Configuration conf = fileSystem.getConf(); // FsServerDefaults fsDefaults = fileSystem.getServerDefaults(path); // //from w w w. jav a2 s . c o m // EnumSet<CreateFlag> flags = EnumSet.of(CreateFlag.CREATE, // CreateFlag.OVERWRITE); // if (Boolean.getBoolean(HDFS_SYNC_BLOCK)) { // flags.add(CreateFlag.SYNC_BLOCK); // } // fileSystem.getoutputStream = fileSystem.create( // path, // FsPermission.getDefault().applyUMask(FsPermission.getUMask(conf)), // flags, // fsDefaults.getFileBufferSize(), // fsDefaults.getReplication(), // fsDefaults.getBlockSize(), null); // fileSystem. this.outputStream = fileSystem.create(path, FsPermission.getDefault(), true, 50000, fileSystem.getDefaultReplication(), fileSystem.getDefaultBlockSize(), null); }
From source file:org.apache.ignite.loadtests.igfs.IgfsPerformanceBenchmark.java
License:Apache License
/** * Tests stream write to specified file. * * @param file File to write to./*from ww w .j av a 2s . c om*/ * @param len Length to write. * @param bufSize Buffer size. * @param replication Replication factor. * @param progress Progress that will be incremented on each written chunk. */ private static void benchmarkWrite(FileSystem fs, Path file, long len, int bufSize, short replication, @Nullable AtomicLong progress) throws Exception { try (FSDataOutputStream out = fs.create(file, true, bufSize, replication, fs.getDefaultBlockSize())) { long written = 0; byte[] data = new byte[bufSize]; while (written < len) { int chunk = (int) Math.min(len - written, bufSize); out.write(data, 0, chunk); written += chunk; if (progress != null) progress.addAndGet(chunk); } out.flush(); } catch (Exception e) { e.printStackTrace(); throw e; } }
From source file:org.apache.impala.catalog.HdfsTable.java
License:Apache License
/** * Helper method to synthesize block metadata for file descriptor fd. *///from ww w . jav a 2 s . c o m private void synthesizeFdBlockMetadata(FileSystem fs, FileDescriptor fd, HdfsFileFormat fileFormat) { long start = 0; long remaining = fd.getFileLength(); // Workaround HADOOP-11584 by using the filesystem default block size rather than // the block size from the FileStatus. // TODO: after HADOOP-11584 is resolved, get the block size from the FileStatus. long blockSize = fs.getDefaultBlockSize(); if (blockSize < MIN_SYNTHETIC_BLOCK_SIZE) blockSize = MIN_SYNTHETIC_BLOCK_SIZE; if (!fileFormat.isSplittable(HdfsCompression.fromFileName(fd.getFileName()))) { blockSize = remaining; } while (remaining > 0) { long len = Math.min(remaining, blockSize); List<BlockReplica> replicas = Lists .newArrayList(new BlockReplica(hostIndex_.getIndex(REMOTE_NETWORK_ADDRESS), false)); fd.addFileBlock(new FileBlock(start, len, replicas)); remaining -= len; start += len; } }
From source file:org.apache.pig.builtin.TrevniStorage.java
License:Apache License
@Override public OutputFormat<NullWritable, Object> getOutputFormat() throws IOException { class TrevniStorageOutputFormat extends FileOutputFormat<NullWritable, Object> { private Schema schema; TrevniStorageOutputFormat(final Schema s) { schema = s;//from ww w . j a v a 2 s . c o m if (s == null) { String schemaString = getProperties(AvroStorage.class, udfContextSignature) .getProperty(OUTPUT_AVRO_SCHEMA); if (schemaString != null) { schema = (new Schema.Parser()).parse(schemaString); } } } @Override public RecordWriter<NullWritable, Object> getRecordWriter(final TaskAttemptContext tc) throws IOException, InterruptedException { if (schema == null) { String schemaString = getProperties(AvroStorage.class, udfContextSignature) .getProperty(OUTPUT_AVRO_SCHEMA); if (schemaString != null) { schema = (new Schema.Parser()).parse(schemaString); } if (schema == null) { throw new IOException("Null output schema"); } } final ColumnFileMetaData meta = new ColumnFileMetaData(); for (Entry<String, String> e : tc.getConfiguration()) { if (e.getKey().startsWith(org.apache.trevni.avro.AvroTrevniOutputFormat.META_PREFIX)) { meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue().getBytes(MetaData.UTF8)); } } final Path dir = getOutputPath(tc); final FileSystem fs = FileSystem.get(tc.getConfiguration()); final long blockSize = fs.getDefaultBlockSize(); if (!fs.mkdirs(dir)) { throw new IOException("Failed to create directory: " + dir); } meta.setCodec("deflate"); return new AvroRecordWriter(dir, tc.getConfiguration()) { private int part = 0; private Schema avroRecordWriterSchema; private AvroColumnWriter<GenericData.Record> writer; private void flush() throws IOException { Integer taskAttemptId = tc.getTaskAttemptID().getTaskID().getId(); String partName = String.format("%05d_%03d", taskAttemptId, part++); OutputStream out = fs .create(new Path(dir, "part-" + partName + AvroTrevniOutputFormat.EXT)); try { writer.writeTo(out); } finally { out.flush(); out.close(); } } @Override public void close(final TaskAttemptContext arg0) throws IOException, InterruptedException { flush(); } @Override public void write(final NullWritable n, final Object o) throws IOException, InterruptedException { GenericData.Record r = AvroStorageDataConversionUtilities.packIntoAvro((Tuple) o, schema); writer.write(r); if (writer.sizeEstimate() >= blockSize) { flush(); writer = new AvroColumnWriter<GenericData.Record>(avroRecordWriterSchema, meta); } } @Override public void prepareToWrite(Schema s) throws IOException { avroRecordWriterSchema = s; writer = new AvroColumnWriter<GenericData.Record>(avroRecordWriterSchema, meta); } }; } } return new TrevniStorageOutputFormat(schema); }
From source file:org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteSparkReducer.java
License:Apache License
@Override @SuppressWarnings("deprecation") public void call(Tuple2<Long, Iterable<Writable>> arg0) throws Exception { //prepare grouped partition input Long key = arg0._1();// w w w .j a v a 2s . c o m Iterator<Writable> valueList = arg0._2().iterator(); //write entire partition to binary block sequence file SequenceFile.Writer writer = null; try { //create sequence file writer Configuration job = new Configuration(ConfigurationManager.getCachedJobConf()); Path path = new Path(_fnameNew + File.separator + key); FileSystem fs = IOUtilFunctions.getFileSystem(path, job); writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt(MRConfigurationNames.IO_FILE_BUFFER_SIZE, 4096), (short) _replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata()); //write individual blocks unordered to output while (valueList.hasNext()) { PairWritableBlock pair = (PairWritableBlock) valueList.next(); writer.append(pair.indexes, pair.block); } } finally { IOUtilFunctions.closeSilently(writer); } }