List of usage examples for org.apache.hadoop.fs FileSystem getDefaultBlockSize
public long getDefaultBlockSize(Path f)
From source file:org.apache.nifi.processors.hadoop.PutHDFS.java
License:Apache License
@Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { final FlowFile flowFile = session.get(); if (flowFile == null) { return;/*from ww w .j a va 2 s . c om*/ } final FileSystem hdfs = getFileSystem(); final Configuration configuration = getConfiguration(); final UserGroupInformation ugi = getUserGroupInformation(); if (configuration == null || hdfs == null || ugi == null) { getLogger().error("HDFS not configured properly"); session.transfer(flowFile, REL_FAILURE); context.yield(); return; } ugi.doAs(new PrivilegedAction<Object>() { @Override public Object run() { Path tempDotCopyFile = null; FlowFile putFlowFile = flowFile; try { final String dirValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile) .getValue(); final Path configuredRootDirPath = new Path(dirValue); final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue(); final Double blockSizeProp = context.getProperty(BLOCK_SIZE).asDataSize(DataUnit.B); final long blockSize = blockSizeProp != null ? blockSizeProp.longValue() : hdfs.getDefaultBlockSize(configuredRootDirPath); final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B); final int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue() : configuration.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT); final Integer replicationProp = context.getProperty(REPLICATION_FACTOR).asInteger(); final short replication = replicationProp != null ? replicationProp.shortValue() : hdfs.getDefaultReplication(configuredRootDirPath); final CompressionCodec codec = getCompressionCodec(context, configuration); final String filename = codec != null ? putFlowFile.getAttribute(CoreAttributes.FILENAME.key()) + codec.getDefaultExtension() : putFlowFile.getAttribute(CoreAttributes.FILENAME.key()); final Path tempCopyFile = new Path(configuredRootDirPath, "." + filename); final Path copyFile = new Path(configuredRootDirPath, filename); // Create destination directory if it does not exist try { if (!hdfs.getFileStatus(configuredRootDirPath).isDirectory()) { throw new IOException( configuredRootDirPath.toString() + " already exists and is not a directory"); } } catch (FileNotFoundException fe) { if (!hdfs.mkdirs(configuredRootDirPath)) { throw new IOException(configuredRootDirPath.toString() + " could not be created"); } changeOwner(context, hdfs, configuredRootDirPath, flowFile); } final boolean destinationExists = hdfs.exists(copyFile); // If destination file already exists, resolve that based on processor configuration if (destinationExists) { switch (conflictResponse) { case REPLACE_RESOLUTION: if (hdfs.delete(copyFile, false)) { getLogger().info("deleted {} in order to replace with the contents of {}", new Object[] { copyFile, putFlowFile }); } break; case IGNORE_RESOLUTION: session.transfer(putFlowFile, REL_SUCCESS); getLogger().info( "transferring {} to success because file with same name already exists", new Object[] { putFlowFile }); return null; case FAIL_RESOLUTION: session.transfer(session.penalize(putFlowFile), REL_FAILURE); getLogger().warn( "penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile }); return null; default: break; } } // Write FlowFile to temp file on HDFS final StopWatch stopWatch = new StopWatch(true); session.read(putFlowFile, new InputStreamCallback() { @Override public void process(InputStream in) throws IOException { OutputStream fos = null; Path createdFile = null; try { if (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && destinationExists) { fos = hdfs.append(copyFile, bufferSize); } else { fos = hdfs.create(tempCopyFile, true, bufferSize, replication, blockSize); } if (codec != null) { fos = codec.createOutputStream(fos); } createdFile = tempCopyFile; BufferedInputStream bis = new BufferedInputStream(in); StreamUtils.copy(bis, fos); bis = null; fos.flush(); } finally { try { if (fos != null) { fos.close(); } } catch (RemoteException re) { // when talking to remote HDFS clusters, we don't notice problems until fos.close() if (createdFile != null) { try { hdfs.delete(createdFile, false); } catch (Throwable ignore) { } } throw re; } catch (Throwable ignore) { } fos = null; } } }); stopWatch.stop(); final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize()); final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS); tempDotCopyFile = tempCopyFile; if (!conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) || (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && !destinationExists)) { boolean renamed = false; for (int i = 0; i < 10; i++) { // try to rename multiple times. if (hdfs.rename(tempCopyFile, copyFile)) { renamed = true; break;// rename was successful } Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve } if (!renamed) { hdfs.delete(tempCopyFile, false); throw new ProcessException("Copied file to HDFS but could not rename dot file " + tempCopyFile + " to its final filename"); } changeOwner(context, hdfs, copyFile, flowFile); } getLogger().info("copied {} to HDFS at {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, copyFile, millis, dataRate }); final String newFilename = copyFile.getName(); final String hdfsPath = copyFile.getParent().toString(); putFlowFile = session.putAttribute(putFlowFile, CoreAttributes.FILENAME.key(), newFilename); putFlowFile = session.putAttribute(putFlowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath); final Path qualifiedPath = copyFile.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory()); session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString()); session.transfer(putFlowFile, REL_SUCCESS); } catch (final Throwable t) { if (tempDotCopyFile != null) { try { hdfs.delete(tempDotCopyFile, false); } catch (Exception e) { getLogger().error("Unable to remove temporary file {} due to {}", new Object[] { tempDotCopyFile, e }); } } getLogger().error("Failed to write to HDFS due to {}", new Object[] { t }); session.transfer(session.penalize(putFlowFile), REL_FAILURE); context.yield(); } return null; } }); }
From source file:org.apache.parquet.hadoop.ParquetFileWriter.java
License:Apache License
/** * @param configuration Hadoop configuration * @param schema the schema of the data//www. jav a 2s . c om * @param file the file to write to * @param mode file creation mode * @param rowGroupSize the row group size * @throws IOException if the file can not be created */ public ParquetFileWriter(Configuration configuration, MessageType schema, Path file, Mode mode, long rowGroupSize, int maxPaddingSize) throws IOException { TypeUtil.checkValidWriteSchema(schema); this.schema = schema; FileSystem fs = file.getFileSystem(configuration); boolean overwriteFlag = (mode == Mode.OVERWRITE); if (supportsBlockSize(fs)) { // use the default block size, unless row group size is larger long dfsBlockSize = Math.max(fs.getDefaultBlockSize(file), rowGroupSize); this.alignment = PaddingAlignment.get(dfsBlockSize, rowGroupSize, maxPaddingSize); this.out = fs.create(file, overwriteFlag, DFS_BUFFER_SIZE_DEFAULT, fs.getDefaultReplication(file), dfsBlockSize); } else { this.alignment = NoAlignment.get(rowGroupSize); this.out = fs.create(file, overwriteFlag); } }
From source file:org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims.java
License:Apache License
public static long getDefaultBlockSize(FileSystem fs, Path path) { return fs.getDefaultBlockSize(path); }
From source file:org.mrgeo.hdfs.output.image.HdfsMrsImagePyramidOutputFormatProvider.java
License:Apache License
private void setupSingleOutput(final Job job, final String outputWithZoom) throws IOException { job.setOutputFormatClass(HdfsMrsPyramidOutputFormat.class); HdfsMrsPyramidOutputFormat.setOutputInfo(job, outputWithZoom); final Configuration conf = job.getConfiguration(); final FileSystem fs = HadoopFileUtils.getFileSystem(conf); // Set up partitioner final int tilesize = context.getTilesize(); final int zoom = context.getZoomlevel(); final Bounds bounds = context.getBounds(); final LongRectangle tileBounds = TMSUtils.boundsToTile(bounds.getTMSBounds(), zoom, tilesize) .toLongRectangle();//www. ja va2 s. c o m final int increment = conf.getInt(TileIdPartitioner.INCREMENT_KEY, -1); if (increment != -1) { // if increment is provided, use it to setup the partitioner splitFileTmp = TileIdPartitioner.setup(job, new ImageSplitGenerator(tileBounds.getMinX(), tileBounds.getMinY(), tileBounds.getMaxX(), tileBounds.getMaxY(), zoom, increment)); } else if (!context.isCalculatePartitions()) { // can't calculate partitions on size, just use increment of 1 (1 row per partition) splitFileTmp = TileIdPartitioner.setup(job, new ImageSplitGenerator(tileBounds.getMinX(), tileBounds.getMinY(), tileBounds.getMaxX(), tileBounds.getMaxY(), zoom, 1)); } else { final int bands = context.getBands(); final int tiletype = context.getTiletype(); final int tileSizeBytes = tilesize * tilesize * bands * RasterUtils.getElementSize(tiletype); // if increment is not provided, set up the partitioner using max partitions final String strMaxPartitions = conf.get(TileIdPartitioner.MAX_PARTITIONS_KEY); if (strMaxPartitions != null) { // We know the max partitions conf setting exists, let's go read it. The // 1000 hard-coded default value is never used. final int maxPartitions = conf.getInt(TileIdPartitioner.MAX_PARTITIONS_KEY, 1000); splitFileTmp = TileIdPartitioner.setup(job, new ImageSplitGenerator(tileBounds, zoom, tileSizeBytes, fs.getDefaultBlockSize(new Path("/")), maxPartitions)); } else { splitFileTmp = TileIdPartitioner.setup(job, new ImageSplitGenerator(tileBounds, zoom, tileSizeBytes, fs.getDefaultBlockSize(new Path("/")))); } } }