List of usage examples for org.apache.hadoop.fs FileSystem getDefaultReplication
public short getDefaultReplication(Path path)
From source file:cn.uway.util.apache.parquet.hadoop.ParquetFileWriter.java
License:Apache License
/** * @param configuration Hadoop configuration * @param schema the schema of the data/* w w w . j a va2s .c o m*/ * @param file the file to write to * @param mode file creation mode * @param rowGroupSize the row group size * @throws IOException if the file can not be created */ public ParquetFileWriter(Configuration configuration, MessageType schema, Path file, Mode mode, long rowGroupSize, int maxPaddingSize) throws IOException { TypeUtil.checkValidWriteSchema(schema); this.schema = schema; FileSystem fs = file.getFileSystem(configuration); boolean overwriteFlag = (mode == Mode.OVERWRITE); if (supportsBlockSize(fs)) { // use the default block size, unless row group size is larger long dfsBlockSize = Math.max(fs.getDefaultBlockSize(file), rowGroupSize); this.alignment = PaddingAlignment.get(dfsBlockSize, rowGroupSize, maxPaddingSize); this.out = fs.create(file, overwriteFlag, DFS_BUFFER_SIZE_DEFAULT, fs.getDefaultReplication(file), dfsBlockSize); } else { this.alignment = NoAlignment.get(rowGroupSize); this.out = fs.create(file, overwriteFlag); } }
From source file:cn.uway.util.apache.parquet.hadoop.ParquetFileWriter.java
License:Apache License
/** * FOR TESTING ONLY.// w w w .j a v a2 s . c o m * * @param configuration Hadoop configuration * @param schema the schema of the data * @param file the file to write to * @param rowAndBlockSize the row group size * @throws IOException if the file can not be created */ ParquetFileWriter(Configuration configuration, MessageType schema, Path file, long rowAndBlockSize, int maxPaddingSize) throws IOException { FileSystem fs = file.getFileSystem(configuration); this.schema = schema; this.alignment = PaddingAlignment.get(rowAndBlockSize, rowAndBlockSize, maxPaddingSize); this.out = fs.create(file, true, DFS_BUFFER_SIZE_DEFAULT, fs.getDefaultReplication(file), rowAndBlockSize); }
From source file:com.thinkbiganalytics.nifi.v2.hdfs.AbstractHadoopProcessor.java
License:Apache License
/** * Reset Hadoop Configuration and FileSystem based on the supplied configuration resources. * * @param configResources for configuration * @param dir the target directory * @param context for context, which gives access to the principal * @return An HdfsResources object//from w w w . j a v a 2s. c om * @throws IOException if unable to access HDFS */ HdfsResources resetHDFSResources(String configResources, String dir, ProcessContext context) throws IOException { // org.apache.hadoop.conf.Configuration saves its current thread context class loader to use for threads that it creates // later to do I/O. We need this class loader to be the NarClassLoader instead of the magical // NarThreadContextClassLoader. ClassLoader savedClassLoader = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); try { Configuration config = getConfigurationFromResources(configResources); // first check for timeout on HDFS connection, because FileSystem has a hard coded 15 minute timeout checkHdfsUriForTimeout(config); // disable caching of Configuration and FileSystem objects, else we cannot reconfigure the processor without a complete // restart String disableCacheName = String.format("fs.%s.impl.disable.cache", FileSystem.getDefaultUri(config).getScheme()); config.set(disableCacheName, "true"); // If kerberos is enabled, create the file system as the kerberos principal // -- use RESOURCE_LOCK to guarantee UserGroupInformation is accessed by only a single thread at at time FileSystem fs = null; UserGroupInformation ugi = null; synchronized (RESOURCES_LOCK) { if (config.get("hadoop.security.authentication").equalsIgnoreCase("kerberos")) { String principal = context.getProperty(kerberosPrincipal).getValue(); String keyTab = context.getProperty(kerberosKeytab).getValue(); UserGroupInformation.setConfiguration(config); ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keyTab); modifyConfig(context, config); fs = getFileSystemAsUser(config, ugi); lastKerberosReloginTime = System.currentTimeMillis() / 1000; } else { config.set("ipc.client.fallback-to-simple-auth-allowed", "true"); config.set("hadoop.security.authentication", "simple"); modifyConfig(context, config); fs = getFileSystem(config); } } getLog().info( "Initialized a new HDFS File System with working dir: {} default block size: {} default replication: {} config: {}", new Object[] { fs.getWorkingDirectory(), fs.getDefaultBlockSize(new Path(dir)), fs.getDefaultReplication(new Path(dir)), config.toString() }); return new HdfsResources(config, fs, ugi); } finally { Thread.currentThread().setContextClassLoader(savedClassLoader); } }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
public static Short getDefaultReplication(FileSystem fs, Path path) { return fs.getDefaultReplication(path); }
From source file:gobblin.util.SerializationUtils.java
License:Apache License
/** * Serialize a {@link State} instance to a file. * * @param fs the {@link FileSystem} instance for creating the file * @param jobStateFilePath the path to the file * @param state the {@link State} to serialize * @param <T> the {@link State} object type * @throws IOException if it fails to serialize the {@link State} instance *//*from w ww . ja va 2s .co m*/ public static <T extends State> void serializeState(FileSystem fs, Path jobStateFilePath, T state) throws IOException { serializeState(fs, jobStateFilePath, state, fs.getDefaultReplication(jobStateFilePath)); }
From source file:org.apache.carbondata.core.datastorage.store.impl.FileFactory.java
License:Apache License
public static DataOutputStream getDataOutputStream(String path, FileType fileType, int bufferSize, long blockSize) throws IOException { path = path.replace("\\", "/"); switch (fileType) { case LOCAL:/*from w w w . j a v a 2 s . c o m*/ return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path), bufferSize)); case HDFS: case VIEWFS: Path pt = new Path(path); FileSystem fs = pt.getFileSystem(configuration); FSDataOutputStream stream = fs.create(pt, true, bufferSize, fs.getDefaultReplication(pt), blockSize); return stream; default: return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path), bufferSize)); } }
From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java
License:Apache License
public static DataOutputStream getDataOutputStream(String path, FileType fileType, int bufferSize, long blockSize) throws IOException { path = path.replace("\\", "/"); switch (fileType) { case LOCAL:/*from w w w .j a v a2 s . c o m*/ path = getUpdatedFilePath(path, fileType); return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path), bufferSize)); case HDFS: case ALLUXIO: case VIEWFS: Path pt = new Path(path); FileSystem fs = pt.getFileSystem(configuration); FSDataOutputStream stream = fs.create(pt, true, bufferSize, fs.getDefaultReplication(pt), blockSize); return stream; default: path = getUpdatedFilePath(path, fileType); return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path), bufferSize)); } }
From source file:org.apache.gobblin.data.management.copy.CopyableFile.java
License:Apache License
/** * @return desired replication for destination file. *///from w w w . j a v a 2 s . c o m public short getReplication(FileSystem targetFs) { return getPreserve().preserve(PreserveAttributes.Option.REPLICATION) ? getOrigin().getReplication() : targetFs.getDefaultReplication(this.destination); }
From source file:org.apache.nifi.processors.hadoop.AbstractHadoopProcessor.java
License:Apache License
HdfsResources resetHDFSResources(String configResources, ProcessContext context) throws IOException { Configuration config = new ExtendedConfiguration(getLogger()); config.setClassLoader(Thread.currentThread().getContextClassLoader()); getConfigurationFromResources(config, configResources); // give sub-classes a chance to process configuration preProcessConfiguration(config, context); // first check for timeout on HDFS connection, because FileSystem has a hard coded 15 minute timeout checkHdfsUriForTimeout(config);/*from w w w .j a v a 2s. c o m*/ // disable caching of Configuration and FileSystem objects, else we cannot reconfigure the processor without a complete // restart String disableCacheName = String.format("fs.%s.impl.disable.cache", FileSystem.getDefaultUri(config).getScheme()); config.set(disableCacheName, "true"); // If kerberos is enabled, create the file system as the kerberos principal // -- use RESOURCE_LOCK to guarantee UserGroupInformation is accessed by only a single thread at at time FileSystem fs; UserGroupInformation ugi; synchronized (RESOURCES_LOCK) { if (SecurityUtil.isSecurityEnabled(config)) { String principal = context.getProperty(kerberosProperties.getKerberosPrincipal()) .evaluateAttributeExpressions().getValue(); String keyTab = context.getProperty(kerberosProperties.getKerberosKeytab()) .evaluateAttributeExpressions().getValue(); ugi = SecurityUtil.loginKerberos(config, principal, keyTab); fs = getFileSystemAsUser(config, ugi); } else { config.set("ipc.client.fallback-to-simple-auth-allowed", "true"); config.set("hadoop.security.authentication", "simple"); ugi = SecurityUtil.loginSimple(config); fs = getFileSystemAsUser(config, ugi); } } getLogger().debug("resetHDFSResources UGI {}", new Object[] { ugi }); final Path workingDir = fs.getWorkingDirectory(); getLogger().info( "Initialized a new HDFS File System with working dir: {} default block size: {} default replication: {} config: {}", new Object[] { workingDir, fs.getDefaultBlockSize(workingDir), fs.getDefaultReplication(workingDir), config.toString() }); return new HdfsResources(config, fs, ugi); }
From source file:org.apache.nifi.processors.hadoop.PutHDFS.java
License:Apache License
@Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { final FlowFile flowFile = session.get(); if (flowFile == null) { return;// www .j a v a 2 s. com } final FileSystem hdfs = getFileSystem(); final Configuration configuration = getConfiguration(); final UserGroupInformation ugi = getUserGroupInformation(); if (configuration == null || hdfs == null || ugi == null) { getLogger().error("HDFS not configured properly"); session.transfer(flowFile, REL_FAILURE); context.yield(); return; } ugi.doAs(new PrivilegedAction<Object>() { @Override public Object run() { Path tempDotCopyFile = null; FlowFile putFlowFile = flowFile; try { final String dirValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile) .getValue(); final Path configuredRootDirPath = new Path(dirValue); final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue(); final Double blockSizeProp = context.getProperty(BLOCK_SIZE).asDataSize(DataUnit.B); final long blockSize = blockSizeProp != null ? blockSizeProp.longValue() : hdfs.getDefaultBlockSize(configuredRootDirPath); final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B); final int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue() : configuration.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT); final Integer replicationProp = context.getProperty(REPLICATION_FACTOR).asInteger(); final short replication = replicationProp != null ? replicationProp.shortValue() : hdfs.getDefaultReplication(configuredRootDirPath); final CompressionCodec codec = getCompressionCodec(context, configuration); final String filename = codec != null ? putFlowFile.getAttribute(CoreAttributes.FILENAME.key()) + codec.getDefaultExtension() : putFlowFile.getAttribute(CoreAttributes.FILENAME.key()); final Path tempCopyFile = new Path(configuredRootDirPath, "." + filename); final Path copyFile = new Path(configuredRootDirPath, filename); // Create destination directory if it does not exist try { if (!hdfs.getFileStatus(configuredRootDirPath).isDirectory()) { throw new IOException( configuredRootDirPath.toString() + " already exists and is not a directory"); } } catch (FileNotFoundException fe) { if (!hdfs.mkdirs(configuredRootDirPath)) { throw new IOException(configuredRootDirPath.toString() + " could not be created"); } changeOwner(context, hdfs, configuredRootDirPath, flowFile); } final boolean destinationExists = hdfs.exists(copyFile); // If destination file already exists, resolve that based on processor configuration if (destinationExists) { switch (conflictResponse) { case REPLACE_RESOLUTION: if (hdfs.delete(copyFile, false)) { getLogger().info("deleted {} in order to replace with the contents of {}", new Object[] { copyFile, putFlowFile }); } break; case IGNORE_RESOLUTION: session.transfer(putFlowFile, REL_SUCCESS); getLogger().info( "transferring {} to success because file with same name already exists", new Object[] { putFlowFile }); return null; case FAIL_RESOLUTION: session.transfer(session.penalize(putFlowFile), REL_FAILURE); getLogger().warn( "penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile }); return null; default: break; } } // Write FlowFile to temp file on HDFS final StopWatch stopWatch = new StopWatch(true); session.read(putFlowFile, new InputStreamCallback() { @Override public void process(InputStream in) throws IOException { OutputStream fos = null; Path createdFile = null; try { if (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && destinationExists) { fos = hdfs.append(copyFile, bufferSize); } else { fos = hdfs.create(tempCopyFile, true, bufferSize, replication, blockSize); } if (codec != null) { fos = codec.createOutputStream(fos); } createdFile = tempCopyFile; BufferedInputStream bis = new BufferedInputStream(in); StreamUtils.copy(bis, fos); bis = null; fos.flush(); } finally { try { if (fos != null) { fos.close(); } } catch (RemoteException re) { // when talking to remote HDFS clusters, we don't notice problems until fos.close() if (createdFile != null) { try { hdfs.delete(createdFile, false); } catch (Throwable ignore) { } } throw re; } catch (Throwable ignore) { } fos = null; } } }); stopWatch.stop(); final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize()); final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS); tempDotCopyFile = tempCopyFile; if (!conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) || (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && !destinationExists)) { boolean renamed = false; for (int i = 0; i < 10; i++) { // try to rename multiple times. if (hdfs.rename(tempCopyFile, copyFile)) { renamed = true; break;// rename was successful } Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve } if (!renamed) { hdfs.delete(tempCopyFile, false); throw new ProcessException("Copied file to HDFS but could not rename dot file " + tempCopyFile + " to its final filename"); } changeOwner(context, hdfs, copyFile, flowFile); } getLogger().info("copied {} to HDFS at {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, copyFile, millis, dataRate }); final String newFilename = copyFile.getName(); final String hdfsPath = copyFile.getParent().toString(); putFlowFile = session.putAttribute(putFlowFile, CoreAttributes.FILENAME.key(), newFilename); putFlowFile = session.putAttribute(putFlowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath); final Path qualifiedPath = copyFile.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory()); session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString()); session.transfer(putFlowFile, REL_SUCCESS); } catch (final Throwable t) { if (tempDotCopyFile != null) { try { hdfs.delete(tempDotCopyFile, false); } catch (Exception e) { getLogger().error("Unable to remove temporary file {} due to {}", new Object[] { tempDotCopyFile, e }); } } getLogger().error("Failed to write to HDFS due to {}", new Object[] { t }); session.transfer(session.penalize(putFlowFile), REL_FAILURE); context.yield(); } return null; } }); }