List of usage examples for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension
String getDefaultExtension();
From source file:org.apache.hawq.pxf.plugins.hdfs.SequenceFileAccessor.java
License:Apache License
private String updateFileExtension(String fileName, CompressionCodec codec) { if (codec != null) { fileName += codec.getDefaultExtension(); }/*from w ww .j a v a 2 s .c o m*/ LOG.debug("File name for write: " + fileName); return fileName; }
From source file:org.apache.jena.grande.mapreduce.io.NQuadsOutputFormat.java
License:Apache License
@Override public RecordWriter<NullWritable, QuadWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); boolean isCompressed = getCompressOutput(context); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); }//from w ww. j av a2 s . c o m Path file = getDefaultWorkFile(context, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new QuadRecordWriter(new OutputStreamWriter(fileOut)); } else { FSDataOutputStream fileOut = fs.create(file, false); return new QuadRecordWriter(new OutputStreamWriter(codec.createOutputStream(fileOut))); } }
From source file:org.apache.jena.hadoop.rdf.io.output.AbstractNodeOutputFormat.java
License:Apache License
@Override public RecordWriter<NodeWritable, TValue> getRecordWriter(TaskAttemptContext context) throws IOException { Configuration config = context.getConfiguration(); boolean isCompressed = getCompressOutput(context); CompressionCodec codec = null; String extension = this.getFileExtension(); if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, config); extension += codec.getDefaultExtension(); }//from w ww .j av a 2 s . com Path file = getDefaultWorkFile(context, extension); LOG.info("Writing output to file " + file); FileSystem fs = file.getFileSystem(config); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return this.getRecordWriter(new OutputStreamWriter(fileOut), config); } else { FSDataOutputStream fileOut = fs.create(file, false); return this.getRecordWriter(new OutputStreamWriter(codec.createOutputStream(fileOut)), config); } }
From source file:org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat.java
License:Apache License
@Override public RecordWriter<TKey, T> getRecordWriter(TaskAttemptContext context) throws IOException { Configuration config = context.getConfiguration(); boolean isCompressed = getCompressOutput(context); CompressionCodec codec = null; // Build the output file path String extension = this.getFileExtension(); if (isCompressed) { // Add compression extension if applicable Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, config); extension += codec.getDefaultExtension(); }/*from w ww . j ava2s .c o m*/ Path file = getDefaultWorkFile(context, extension); LOG.info("Writing output to file " + file); // Open the file appropriately and create a record writer for it FileSystem fs = file.getFileSystem(config); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return this.getRecordWriter(new OutputStreamWriter(fileOut), config, file); } else { FSDataOutputStream fileOut = fs.create(file, false); return this.getRecordWriter(new OutputStreamWriter(codec.createOutputStream(fileOut)), config, file); } }
From source file:org.apache.lens.lib.query.LensFileOutputFormat.java
License:Apache License
/** * Creates the record writer.//from www . j a v a 2 s . c o m * * @param conf the conf * @param tmpWorkPath the tmp work path * @param progress the progress * @param isCompressed the is compressed * @param extn the extn * @param encoding the encoding * @return the lens row writer * @throws IOException Signals that an I/O exception has occurred. */ public static LensRowWriter createRecordWriter(Configuration conf, Path tmpWorkPath, Progressable progress, boolean isCompressed, String extn, String encoding) throws IOException { Path file; if (extn != null) { file = new Path(tmpWorkPath + extn); } else { file = tmpWorkPath; } if (!isCompressed) { FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, progress); return new LensRowWriter(fileOut, encoding, file, extn); } else { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(conf); // create the named codec CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf); // build the filename including the extension String codecExtn = codec.getDefaultExtension(); file = new Path(file + codecExtn); FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, progress); return new LensRowWriter(new DataOutputStream(codec.createOutputStream(fileOut)), encoding, file, extn + codecExtn); } }
From source file:org.apache.nifi.processors.hadoop.FetchHDFS.java
License:Apache License
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { FlowFile flowFile = session.get();/*from w w w . jav a2s. com*/ if (flowFile == null) { return; } final FileSystem hdfs = getFileSystem(); final UserGroupInformation ugi = getUserGroupInformation(); final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(flowFile) .getValue(); final Path path; try { path = new Path(filenameValue); } catch (IllegalArgumentException e) { getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, flowFile, e }); flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage()); flowFile = session.penalize(flowFile); session.transfer(flowFile, REL_FAILURE); return; } final StopWatch stopWatch = new StopWatch(true); final FlowFile finalFlowFile = flowFile; ugi.doAs(new PrivilegedAction<Object>() { @Override public Object run() { InputStream stream = null; CompressionCodec codec = null; Configuration conf = getConfiguration(); final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf); final CompressionType compressionType = CompressionType .valueOf(context.getProperty(COMPRESSION_CODEC).toString()); final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC; if (inferCompressionCodec) { codec = compressionCodecFactory.getCodec(path); } else if (compressionType != CompressionType.NONE) { codec = getCompressionCodec(context, getConfiguration()); } FlowFile flowFile = finalFlowFile; final Path qualifiedPath = path.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory()); try { final String outputFilename; final String originalFilename = path.getName(); stream = hdfs.open(path, 16384); // Check if compression codec is defined (inferred or otherwise) if (codec != null) { stream = codec.createInputStream(stream); outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension()); } else { outputFilename = originalFilename; } flowFile = session.importFrom(stream, finalFlowFile); flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename); stopWatch.stop(); getLogger().info("Successfully received content from {} for {} in {}", new Object[] { qualifiedPath, flowFile, stopWatch.getDuration() }); session.getProvenanceReporter().fetch(flowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS)); session.transfer(flowFile, REL_SUCCESS); } catch (final FileNotFoundException | AccessControlException e) { getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { qualifiedPath, flowFile, e }); flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage()); flowFile = session.penalize(flowFile); session.transfer(flowFile, REL_FAILURE); } catch (final IOException e) { getLogger().error( "Failed to retrieve content from {} for {} due to {}; routing to comms.failure", new Object[] { qualifiedPath, flowFile, e }); flowFile = session.penalize(flowFile); session.transfer(flowFile, REL_COMMS_FAILURE); } finally { IOUtils.closeQuietly(stream); } return null; } }); }
From source file:org.apache.nifi.processors.hadoop.GetHDFS.java
License:Apache License
protected void processBatchOfFiles(final List<Path> files, final ProcessContext context, final ProcessSession session) { // process the batch of files InputStream stream = null;//w w w.ja va 2 s . c o m CompressionCodec codec = null; Configuration conf = getConfiguration(); FileSystem hdfs = getFileSystem(); final boolean keepSourceFiles = context.getProperty(KEEP_SOURCE_FILE).asBoolean(); final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B); int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue() : conf.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT); final Path rootDir = new Path(context.getProperty(DIRECTORY).evaluateAttributeExpressions().getValue()); final CompressionType compressionType = CompressionType .valueOf(context.getProperty(COMPRESSION_CODEC).toString()); final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC; if (inferCompressionCodec || compressionType != CompressionType.NONE) { codec = getCompressionCodec(context, getConfiguration()); } final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf); for (final Path file : files) { try { if (!getUserGroupInformation().doAs((PrivilegedExceptionAction<Boolean>) () -> hdfs.exists(file))) { continue; // if file is no longer there then move on } final String originalFilename = file.getName(); final String relativePath = getPathDifference(rootDir, file); stream = getUserGroupInformation() .doAs((PrivilegedExceptionAction<FSDataInputStream>) () -> hdfs.open(file, bufferSize)); final String outputFilename; // Check if we should infer compression codec if (inferCompressionCodec) { codec = compressionCodecFactory.getCodec(file); } // Check if compression codec is defined (inferred or otherwise) if (codec != null) { stream = codec.createInputStream(stream); outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension()); } else { outputFilename = originalFilename; } FlowFile flowFile = session.create(); final StopWatch stopWatch = new StopWatch(true); flowFile = session.importFrom(stream, flowFile); stopWatch.stop(); final String dataRate = stopWatch.calculateDataRate(flowFile.getSize()); final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS); flowFile = session.putAttribute(flowFile, CoreAttributes.PATH.key(), relativePath); flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename); if (!keepSourceFiles && !getUserGroupInformation() .doAs((PrivilegedExceptionAction<Boolean>) () -> hdfs.delete(file, false))) { getLogger().warn("Could not remove {} from HDFS. Not ingesting this file ...", new Object[] { file }); session.remove(flowFile); continue; } session.getProvenanceReporter().receive(flowFile, file.toString()); session.transfer(flowFile, REL_SUCCESS); getLogger().info("retrieved {} from HDFS {} in {} milliseconds at a rate of {}", new Object[] { flowFile, file, millis, dataRate }); session.commit(); } catch (final Throwable t) { getLogger().error("Error retrieving file {} from HDFS due to {}", new Object[] { file, t }); session.rollback(); context.yield(); } finally { IOUtils.closeQuietly(stream); stream = null; } } }
From source file:org.apache.nifi.processors.hadoop.PutHDFS.java
License:Apache License
@Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { final FlowFile flowFile = session.get(); if (flowFile == null) { return;/*from w w w .j a va2 s .c o m*/ } final FileSystem hdfs = getFileSystem(); final Configuration configuration = getConfiguration(); final UserGroupInformation ugi = getUserGroupInformation(); if (configuration == null || hdfs == null || ugi == null) { getLogger().error("HDFS not configured properly"); session.transfer(flowFile, REL_FAILURE); context.yield(); return; } ugi.doAs(new PrivilegedAction<Object>() { @Override public Object run() { Path tempDotCopyFile = null; FlowFile putFlowFile = flowFile; try { final String dirValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile) .getValue(); final Path configuredRootDirPath = new Path(dirValue); final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue(); final Double blockSizeProp = context.getProperty(BLOCK_SIZE).asDataSize(DataUnit.B); final long blockSize = blockSizeProp != null ? blockSizeProp.longValue() : hdfs.getDefaultBlockSize(configuredRootDirPath); final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B); final int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue() : configuration.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT); final Integer replicationProp = context.getProperty(REPLICATION_FACTOR).asInteger(); final short replication = replicationProp != null ? replicationProp.shortValue() : hdfs.getDefaultReplication(configuredRootDirPath); final CompressionCodec codec = getCompressionCodec(context, configuration); final String filename = codec != null ? putFlowFile.getAttribute(CoreAttributes.FILENAME.key()) + codec.getDefaultExtension() : putFlowFile.getAttribute(CoreAttributes.FILENAME.key()); final Path tempCopyFile = new Path(configuredRootDirPath, "." + filename); final Path copyFile = new Path(configuredRootDirPath, filename); // Create destination directory if it does not exist try { if (!hdfs.getFileStatus(configuredRootDirPath).isDirectory()) { throw new IOException( configuredRootDirPath.toString() + " already exists and is not a directory"); } } catch (FileNotFoundException fe) { if (!hdfs.mkdirs(configuredRootDirPath)) { throw new IOException(configuredRootDirPath.toString() + " could not be created"); } changeOwner(context, hdfs, configuredRootDirPath, flowFile); } final boolean destinationExists = hdfs.exists(copyFile); // If destination file already exists, resolve that based on processor configuration if (destinationExists) { switch (conflictResponse) { case REPLACE_RESOLUTION: if (hdfs.delete(copyFile, false)) { getLogger().info("deleted {} in order to replace with the contents of {}", new Object[] { copyFile, putFlowFile }); } break; case IGNORE_RESOLUTION: session.transfer(putFlowFile, REL_SUCCESS); getLogger().info( "transferring {} to success because file with same name already exists", new Object[] { putFlowFile }); return null; case FAIL_RESOLUTION: session.transfer(session.penalize(putFlowFile), REL_FAILURE); getLogger().warn( "penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile }); return null; default: break; } } // Write FlowFile to temp file on HDFS final StopWatch stopWatch = new StopWatch(true); session.read(putFlowFile, new InputStreamCallback() { @Override public void process(InputStream in) throws IOException { OutputStream fos = null; Path createdFile = null; try { if (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && destinationExists) { fos = hdfs.append(copyFile, bufferSize); } else { fos = hdfs.create(tempCopyFile, true, bufferSize, replication, blockSize); } if (codec != null) { fos = codec.createOutputStream(fos); } createdFile = tempCopyFile; BufferedInputStream bis = new BufferedInputStream(in); StreamUtils.copy(bis, fos); bis = null; fos.flush(); } finally { try { if (fos != null) { fos.close(); } } catch (RemoteException re) { // when talking to remote HDFS clusters, we don't notice problems until fos.close() if (createdFile != null) { try { hdfs.delete(createdFile, false); } catch (Throwable ignore) { } } throw re; } catch (Throwable ignore) { } fos = null; } } }); stopWatch.stop(); final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize()); final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS); tempDotCopyFile = tempCopyFile; if (!conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) || (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && !destinationExists)) { boolean renamed = false; for (int i = 0; i < 10; i++) { // try to rename multiple times. if (hdfs.rename(tempCopyFile, copyFile)) { renamed = true; break;// rename was successful } Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve } if (!renamed) { hdfs.delete(tempCopyFile, false); throw new ProcessException("Copied file to HDFS but could not rename dot file " + tempCopyFile + " to its final filename"); } changeOwner(context, hdfs, copyFile, flowFile); } getLogger().info("copied {} to HDFS at {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, copyFile, millis, dataRate }); final String newFilename = copyFile.getName(); final String hdfsPath = copyFile.getParent().toString(); putFlowFile = session.putAttribute(putFlowFile, CoreAttributes.FILENAME.key(), newFilename); putFlowFile = session.putAttribute(putFlowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath); final Path qualifiedPath = copyFile.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory()); session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString()); session.transfer(putFlowFile, REL_SUCCESS); } catch (final Throwable t) { if (tempDotCopyFile != null) { try { hdfs.delete(tempDotCopyFile, false); } catch (Exception e) { getLogger().error("Unable to remove temporary file {} due to {}", new Object[] { tempDotCopyFile, e }); } } getLogger().error("Failed to write to HDFS due to {}", new Object[] { t }); session.transfer(session.penalize(putFlowFile), REL_FAILURE); context.yield(); } return null; } }); }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextOutputFormat.java
License:Apache License
@Override public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); }/*from w w w . j a v a2s .c om*/ Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new PigLineRecordWriter(fileOut, fieldDel); } else { FSDataOutputStream fileOut = fs.create(file, false); return new PigLineRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)), fieldDel); } }
From source file:org.apache.sqoop.connector.hdfs.HdfsLoader.java
License:Apache License
private static String getExtension(ToJobConfiguration toJobConf, CompressionCodec codec) { if (toJobConf.toJobConfig.outputFormat == ToFormat.SEQUENCE_FILE) return ".seq"; if (codec == null) return ".txt"; return codec.getDefaultExtension(); }