List of usage examples for org.apache.hadoop.fs FileSystem getDefaultBlockSize
@Deprecated public long getDefaultBlockSize()
From source file:com.ibm.bi.dml.runtime.io.WriterBinaryBlock.java
License:Open Source License
/** * /*from w ww .j av a2 s . c om*/ * @param path * @param job * @param src * @param rlen * @param clen * @param brlen * @param bclen * @throws IOException * @throws DMLUnsupportedOperationException * @throws DMLRuntimeException */ @SuppressWarnings("deprecation") protected void writeBinaryBlockMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen, int brlen, int bclen, int replication) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException { boolean sparse = src.isInSparseFormat(); FileSystem fs = FileSystem.get(job); //set up preferred custom serialization framework for binary block format if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION) MRJobConfiguration.addBinaryBlockSerializationFramework(job); // 1) create sequence file writer, with right replication factor // (config via 'dfs.replication' not possible since sequence file internally calls fs.getDefaultReplication()) SequenceFile.Writer writer = null; if (replication > 0) //if replication specified (otherwise default) { //copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt("io.file.buffer.size", 4096), (short) replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata()); } else { writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class); } try { // 2) bound check for src block if (src.getNumRows() > rlen || src.getNumColumns() > clen) { throw new IOException("Matrix block [1:" + src.getNumRows() + ",1:" + src.getNumColumns() + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "]."); } //3) reblock and write MatrixIndexes indexes = new MatrixIndexes(); if (rlen <= brlen && clen <= bclen) //opt for single block { //directly write single block indexes.setIndexes(1, 1); writer.append(indexes, src); } else //general case { //initialize blocks for reuse (at most 4 different blocks required) MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, brlen, bclen, sparse, src.getNonZeros()); //create and write subblocks of matrix for (int blockRow = 0; blockRow < (int) Math.ceil(src.getNumRows() / (double) brlen); blockRow++) for (int blockCol = 0; blockCol < (int) Math .ceil(src.getNumColumns() / (double) bclen); blockCol++) { int maxRow = (blockRow * brlen + brlen < src.getNumRows()) ? brlen : src.getNumRows() - blockRow * brlen; int maxCol = (blockCol * bclen + bclen < src.getNumColumns()) ? bclen : src.getNumColumns() - blockCol * bclen; int row_offset = blockRow * brlen; int col_offset = blockCol * bclen; //get reuse matrix block MatrixBlock block = getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen); //copy submatrix to block src.sliceOperations(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block); //append block to sequence file indexes.setIndexes(blockRow + 1, blockCol + 1); writer.append(indexes, block); //reset block for later reuse block.reset(); } } } finally { IOUtilFunctions.closeSilently(writer); } }
From source file:com.ibm.bi.dml.runtime.io.WriterBinaryBlock.java
License:Open Source License
/** * /*from w w w .j ava2 s . co m*/ * @param path * @param job * @param src * @param rlen * @param clen * @param brlen * @param bclen * @param replication * @throws IOException * @throws DMLUnsupportedOperationException * @throws DMLRuntimeException */ @SuppressWarnings("deprecation") protected void writeDiagBinaryBlockMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen, int brlen, int bclen, int replication) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException { boolean sparse = src.isInSparseFormat(); FileSystem fs = FileSystem.get(job); //set up preferred custom serialization framework for binary block format if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION) MRJobConfiguration.addBinaryBlockSerializationFramework(job); // 1) create sequence file writer, with right replication factor // (config via 'dfs.replication' not possible since sequence file internally calls fs.getDefaultReplication()) SequenceFile.Writer writer = null; if (replication > 0) //if replication specified (otherwise default) { //copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt("io.file.buffer.size", 4096), (short) replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata()); } else { writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class); } try { // 2) bound check for src block if (src.getNumRows() > rlen || src.getNumColumns() > clen) { throw new IOException("Matrix block [1:" + src.getNumRows() + ",1:" + src.getNumColumns() + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "]."); } //3) reblock and write MatrixIndexes indexes = new MatrixIndexes(); if (rlen <= brlen && clen <= bclen) //opt for single block { //directly write single block indexes.setIndexes(1, 1); writer.append(indexes, src); } else //general case { //initialize blocks for reuse (at most 4 different blocks required) MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, brlen, bclen, sparse, src.getNonZeros()); MatrixBlock emptyBlock = new MatrixBlock(); //create and write subblocks of matrix for (int blockRow = 0; blockRow < (int) Math.ceil(src.getNumRows() / (double) brlen); blockRow++) for (int blockCol = 0; blockCol < (int) Math .ceil(src.getNumColumns() / (double) bclen); blockCol++) { int maxRow = (blockRow * brlen + brlen < src.getNumRows()) ? brlen : src.getNumRows() - blockRow * brlen; int maxCol = (blockCol * bclen + bclen < src.getNumColumns()) ? bclen : src.getNumColumns() - blockCol * bclen; MatrixBlock block = null; if (blockRow == blockCol) //block on diagonal { int row_offset = blockRow * brlen; int col_offset = blockCol * bclen; //get reuse matrix block block = getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen); //copy submatrix to block src.sliceOperations(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block); } else //empty block (not on diagonal) { block = emptyBlock; block.reset(maxRow, maxCol); } //append block to sequence file indexes.setIndexes(blockRow + 1, blockCol + 1); writer.append(indexes, block); //reset block for later reuse if (blockRow != blockCol) block.reset(); } } } finally { IOUtilFunctions.closeSilently(writer); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java
License:Apache License
private static long getBlockSize(EnumSet<FileAttribute> fileAttributes, FileStatus sourceFile, FileSystem targetFS) { return fileAttributes.contains(FileAttribute.BLOCKSIZE) ? sourceFile.getBlockSize() : targetFS.getDefaultBlockSize(); }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
private static void touchFile(String path) throws Exception { FileSystem fs; DataOutputStream outputStream = null; GzipCodec gzipCodec = ReflectionUtils.newInstance(GzipCodec.class, getConfiguration()); Compressor gzipCompressor = CodecPool.getCompressor(gzipCodec); OutputStream compressedOut = null; try {/* ww w . j a va2 s. c o m*/ fs = cluster.getFileSystem(); final Path qualifiedPath = new Path(path).makeQualified(fs); final long blockSize = fs.getDefaultBlockSize() * 2; outputStream = fs.create(qualifiedPath, true, 0, (short) (fs.getDefaultReplication() * 2), blockSize); compressedOut = gzipCodec.createOutputStream(outputStream, gzipCompressor); Message msg = new Message("generating test data".getBytes()); AuditUtil.attachHeaders(msg, currentTimestamp); byte[] encodeMsg = Base64.encodeBase64(msg.getData().array()); compressedOut.write(encodeMsg); compressedOut.write("\n".getBytes()); compressedOut.write(encodeMsg); compressedOut.write("\n".getBytes()); // Genearate a msg with different timestamp. Default window period is 60sec AuditUtil.attachHeaders(msg, nextMinuteTimeStamp); encodeMsg = Base64.encodeBase64(msg.getData().array()); compressedOut.write(encodeMsg); compressedOut.write("\n".getBytes()); compressedOut.flush(); compressedOut.close(); pathList.add(qualifiedPath); ++nFiles; FileStatus fileStatus = fs.getFileStatus(qualifiedPath); System.out.println(fileStatus.getBlockSize()); System.out.println(fileStatus.getReplication()); } finally { compressedOut.close(); IOUtils.cleanup(null, outputStream); CodecPool.returnCompressor(gzipCompressor); } }
From source file:com.inmobi.conduit.distcp.tools.TestDistCp.java
License:Apache License
private static void touchFile(String path) throws Exception { FileSystem fs; DataOutputStream outputStream = null; GzipCodec gzipCodec = ReflectionUtils.newInstance(GzipCodec.class, getConfigurationForCluster()); Compressor gzipCompressor = CodecPool.getCompressor(gzipCodec); OutputStream compressedOut = null; try {//w w w . ja v a 2 s . c o m fs = cluster.getFileSystem(); final Path qualifiedPath = new Path(path).makeQualified(fs); final long blockSize = fs.getDefaultBlockSize() * 2; outputStream = fs.create(qualifiedPath, true, 0, (short) (fs.getDefaultReplication() * 2), blockSize); compressedOut = gzipCodec.createOutputStream(outputStream, gzipCompressor); compressedOut.write(new byte[FILE_SIZE]); compressedOut.write("\n".getBytes()); compressedOut.flush(); //outputStream.write(new byte[FILE_SIZE]); pathList.add(qualifiedPath); } finally { compressedOut.close(); IOUtils.cleanup(null, outputStream); CodecPool.returnCompressor(gzipCompressor); } }
From source file:dima.kmeansseq.SequenceFile.java
License:Apache License
/** * Construct the preferred type of SequenceFile Writer. * /* w w w . j a v a2 s . c om*/ * @param fs * The configured filesystem. * @param conf * The configuration. * @param name * The name of the file. * @param keyClass * The 'key' type. * @param valClass * The 'value' type. * @param compressionType * The compression type. * @return Returns the handle to the constructed SequenceFile Writer. * @throws IOException */ public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType) throws IOException { return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096), fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, new DefaultCodec(), null, new Metadata()); }
From source file:dima.kmeansseq.SequenceFile.java
License:Apache License
/** * Construct the preferred type of SequenceFile Writer. * //from w ww.j a v a2 s . c o m * @param fs * The configured filesystem. * @param conf * The configuration. * @param name * The name of the file. * @param keyClass * The 'key' type. * @param valClass * The 'value' type. * @param compressionType * The compression type. * @param progress * The Progressable object to track progress. * @return Returns the handle to the constructed SequenceFile Writer. * @throws IOException */ public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType, Progressable progress) throws IOException { return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096), fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, new DefaultCodec(), progress, new Metadata()); }
From source file:dima.kmeansseq.SequenceFile.java
License:Apache License
/** * Construct the preferred type of SequenceFile Writer. * /*w w w. j a v a2s . co m*/ * @param fs * The configured filesystem. * @param conf * The configuration. * @param name * The name of the file. * @param keyClass * The 'key' type. * @param valClass * The 'value' type. * @param compressionType * The compression type. * @param codec * The compression codec. * @return Returns the handle to the constructed SequenceFile Writer. * @throws IOException */ public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec) throws IOException { return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096), fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, codec, null, new Metadata()); }
From source file:dima.kmeansseq.SequenceFile.java
License:Apache License
/** * Construct the preferred type of SequenceFile Writer. * // w ww. j a va 2 s.c o m * @param fs * The configured filesystem. * @param conf * The configuration. * @param name * The name of the file. * @param keyClass * The 'key' type. * @param valClass * The 'value' type. * @param compressionType * The compression type. * @param codec * The compression codec. * @param progress * The Progressable object to track progress. * @param metadata * The metadata of the file. * @return Returns the handle to the constructed SequenceFile Writer. * @throws IOException */ public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec, Progressable progress, Metadata metadata) throws IOException { return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096), fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, codec, progress, metadata); }
From source file:edu.umn.cs.spatialHadoop.operations.Repartition.java
License:Open Source License
public static void repartitionMapReduce(Path inFile, Path outPath, CellInfo[] cellInfos, OperationsParams params) throws IOException, InterruptedException { String sindex = params.get("sindex"); boolean overwrite = params.getBoolean("overwrite", false); Shape stockShape = params.getShape("shape"); FileSystem outFs = outPath.getFileSystem(params); // Calculate number of partitions in output file // Copy blocksize from source file if it's globally indexed @SuppressWarnings("deprecation") final long blockSize = outFs.getDefaultBlockSize(); // Calculate the dimensions of each partition based on gindex type if (cellInfos == null) { if (sindex.equals("grid")) { Rectangle input_mbr = FileMBR.fileMBR(inFile, params); long inFileSize = FileMBR.sizeOfLastProcessedFile; int num_partitions = calculateNumberOfPartitions(new Configuration(), inFileSize, outFs, outPath, blockSize);//from w ww . java2 s .com GridInfo gridInfo = new GridInfo(input_mbr.x1, input_mbr.y1, input_mbr.x2, input_mbr.y2); gridInfo.calculateCellDimensions(num_partitions); cellInfos = gridInfo.getAllCells(); } else if (sindex.equals("rtree") || sindex.equals("r+tree") || sindex.equals("str") || sindex.equals("str+")) { // Pack in rectangles using an RTree cellInfos = packInRectangles(inFile, outPath, params); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } } JobConf job = new JobConf(params, Repartition.class); job.setJobName("Repartition"); // Overwrite output file if (outFs.exists(outPath)) { if (overwrite) outFs.delete(outPath, true); else throw new RuntimeException( "Output file '" + outPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global index if (sindex.equals("rtree") || sindex.equals("str")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid, str+, and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); ShapeInputFormat.setInputPaths(job, inFile); job.setInputFormat(ShapeInputFormat.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); FileOutputFormat.setOutputPath(job, outPath); if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); JobClient.runJob(job); }