Example usage for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension.

Prototype

String getDefaultExtension();

Source Link

Document

Get the default filename extension for this kind of compression.

Usage

From source file:org.apache.hawq.pxf.plugins.hdfs.SequenceFileAccessor.java

License:Apache License

private String updateFileExtension(String fileName, CompressionCodec codec) {

    if (codec != null) {
        fileName += codec.getDefaultExtension();
    }/*from w  ww  .j  a  v a 2 s .c  o m*/
    LOG.debug("File name for write: " + fileName);
    return fileName;
}

From source file:org.apache.jena.grande.mapreduce.io.NQuadsOutputFormat.java

License:Apache License

@Override
public RecordWriter<NullWritable, QuadWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    boolean isCompressed = getCompressOutput(context);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }//from   w ww. j av a2 s  .  c o  m
    Path file = getDefaultWorkFile(context, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new QuadRecordWriter(new OutputStreamWriter(fileOut));
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new QuadRecordWriter(new OutputStreamWriter(codec.createOutputStream(fileOut)));
    }
}

From source file:org.apache.jena.hadoop.rdf.io.output.AbstractNodeOutputFormat.java

License:Apache License

@Override
public RecordWriter<NodeWritable, TValue> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration config = context.getConfiguration();
    boolean isCompressed = getCompressOutput(context);
    CompressionCodec codec = null;
    String extension = this.getFileExtension();
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, config);
        extension += codec.getDefaultExtension();
    }//from  w ww .j av  a 2 s  . com
    Path file = getDefaultWorkFile(context, extension);
    LOG.info("Writing output to file " + file);
    FileSystem fs = file.getFileSystem(config);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return this.getRecordWriter(new OutputStreamWriter(fileOut), config);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return this.getRecordWriter(new OutputStreamWriter(codec.createOutputStream(fileOut)), config);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormat.java

License:Apache License

@Override
public RecordWriter<TKey, T> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration config = context.getConfiguration();
    boolean isCompressed = getCompressOutput(context);
    CompressionCodec codec = null;

    // Build the output file path
    String extension = this.getFileExtension();
    if (isCompressed) {
        // Add compression extension if applicable
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, config);
        extension += codec.getDefaultExtension();
    }/*from w ww . j  ava2s  .c o  m*/
    Path file = getDefaultWorkFile(context, extension);
    LOG.info("Writing output to file " + file);

    // Open the file appropriately and create a record writer for it
    FileSystem fs = file.getFileSystem(config);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return this.getRecordWriter(new OutputStreamWriter(fileOut), config, file);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return this.getRecordWriter(new OutputStreamWriter(codec.createOutputStream(fileOut)), config, file);
    }
}

From source file:org.apache.lens.lib.query.LensFileOutputFormat.java

License:Apache License

/**
 * Creates the record writer.//from   www  . j  a  v a  2 s  . c o m
 *
 * @param conf         the conf
 * @param tmpWorkPath  the tmp work path
 * @param progress     the progress
 * @param isCompressed the is compressed
 * @param extn         the extn
 * @param encoding     the encoding
 * @return the lens row writer
 * @throws IOException Signals that an I/O exception has occurred.
 */
public static LensRowWriter createRecordWriter(Configuration conf, Path tmpWorkPath, Progressable progress,
        boolean isCompressed, String extn, String encoding) throws IOException {
    Path file;
    if (extn != null) {
        file = new Path(tmpWorkPath + extn);
    } else {
        file = tmpWorkPath;
    }
    if (!isCompressed) {
        FileSystem fs = file.getFileSystem(conf);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new LensRowWriter(fileOut, encoding, file, extn);
    } else {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(conf);
        // create the named codec
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);
        // build the filename including the extension
        String codecExtn = codec.getDefaultExtension();
        file = new Path(file + codecExtn);
        FileSystem fs = file.getFileSystem(conf);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new LensRowWriter(new DataOutputStream(codec.createOutputStream(fileOut)), encoding, file,
                extn + codecExtn);
    }
}

From source file:org.apache.nifi.processors.hadoop.FetchHDFS.java

License:Apache License

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();/*from w  w  w . jav  a2s. com*/
    if (flowFile == null) {
        return;
    }

    final FileSystem hdfs = getFileSystem();
    final UserGroupInformation ugi = getUserGroupInformation();
    final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(flowFile)
            .getValue();

    final Path path;
    try {
        path = new Path(filenameValue);
    } catch (IllegalArgumentException e) {
        getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure",
                new Object[] { filenameValue, flowFile, e });
        flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage());
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
        return;
    }

    final StopWatch stopWatch = new StopWatch(true);
    final FlowFile finalFlowFile = flowFile;

    ugi.doAs(new PrivilegedAction<Object>() {
        @Override
        public Object run() {
            InputStream stream = null;
            CompressionCodec codec = null;
            Configuration conf = getConfiguration();
            final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
            final CompressionType compressionType = CompressionType
                    .valueOf(context.getProperty(COMPRESSION_CODEC).toString());
            final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC;

            if (inferCompressionCodec) {
                codec = compressionCodecFactory.getCodec(path);
            } else if (compressionType != CompressionType.NONE) {
                codec = getCompressionCodec(context, getConfiguration());
            }

            FlowFile flowFile = finalFlowFile;
            final Path qualifiedPath = path.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory());
            try {
                final String outputFilename;
                final String originalFilename = path.getName();
                stream = hdfs.open(path, 16384);

                // Check if compression codec is defined (inferred or otherwise)
                if (codec != null) {
                    stream = codec.createInputStream(stream);
                    outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension());
                } else {
                    outputFilename = originalFilename;
                }

                flowFile = session.importFrom(stream, finalFlowFile);
                flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename);

                stopWatch.stop();
                getLogger().info("Successfully received content from {} for {} in {}",
                        new Object[] { qualifiedPath, flowFile, stopWatch.getDuration() });
                session.getProvenanceReporter().fetch(flowFile, qualifiedPath.toString(),
                        stopWatch.getDuration(TimeUnit.MILLISECONDS));
                session.transfer(flowFile, REL_SUCCESS);
            } catch (final FileNotFoundException | AccessControlException e) {
                getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure",
                        new Object[] { qualifiedPath, flowFile, e });
                flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage());
                flowFile = session.penalize(flowFile);
                session.transfer(flowFile, REL_FAILURE);
            } catch (final IOException e) {
                getLogger().error(
                        "Failed to retrieve content from {} for {} due to {}; routing to comms.failure",
                        new Object[] { qualifiedPath, flowFile, e });
                flowFile = session.penalize(flowFile);
                session.transfer(flowFile, REL_COMMS_FAILURE);
            } finally {
                IOUtils.closeQuietly(stream);
            }

            return null;
        }
    });

}

From source file:org.apache.nifi.processors.hadoop.GetHDFS.java

License:Apache License

protected void processBatchOfFiles(final List<Path> files, final ProcessContext context,
        final ProcessSession session) {
    // process the batch of files
    InputStream stream = null;//w w  w.ja va  2 s  .  c o  m
    CompressionCodec codec = null;
    Configuration conf = getConfiguration();
    FileSystem hdfs = getFileSystem();
    final boolean keepSourceFiles = context.getProperty(KEEP_SOURCE_FILE).asBoolean();
    final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B);
    int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue()
            : conf.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT);
    final Path rootDir = new Path(context.getProperty(DIRECTORY).evaluateAttributeExpressions().getValue());

    final CompressionType compressionType = CompressionType
            .valueOf(context.getProperty(COMPRESSION_CODEC).toString());
    final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC;
    if (inferCompressionCodec || compressionType != CompressionType.NONE) {
        codec = getCompressionCodec(context, getConfiguration());
    }
    final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
    for (final Path file : files) {
        try {
            if (!getUserGroupInformation().doAs((PrivilegedExceptionAction<Boolean>) () -> hdfs.exists(file))) {
                continue; // if file is no longer there then move on
            }
            final String originalFilename = file.getName();
            final String relativePath = getPathDifference(rootDir, file);

            stream = getUserGroupInformation()
                    .doAs((PrivilegedExceptionAction<FSDataInputStream>) () -> hdfs.open(file, bufferSize));

            final String outputFilename;
            // Check if we should infer compression codec
            if (inferCompressionCodec) {
                codec = compressionCodecFactory.getCodec(file);
            }
            // Check if compression codec is defined (inferred or otherwise)
            if (codec != null) {
                stream = codec.createInputStream(stream);
                outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension());
            } else {
                outputFilename = originalFilename;
            }

            FlowFile flowFile = session.create();

            final StopWatch stopWatch = new StopWatch(true);
            flowFile = session.importFrom(stream, flowFile);
            stopWatch.stop();
            final String dataRate = stopWatch.calculateDataRate(flowFile.getSize());
            final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);

            flowFile = session.putAttribute(flowFile, CoreAttributes.PATH.key(), relativePath);
            flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename);

            if (!keepSourceFiles && !getUserGroupInformation()
                    .doAs((PrivilegedExceptionAction<Boolean>) () -> hdfs.delete(file, false))) {
                getLogger().warn("Could not remove {} from HDFS. Not ingesting this file ...",
                        new Object[] { file });
                session.remove(flowFile);
                continue;
            }

            session.getProvenanceReporter().receive(flowFile, file.toString());
            session.transfer(flowFile, REL_SUCCESS);
            getLogger().info("retrieved {} from HDFS {} in {} milliseconds at a rate of {}",
                    new Object[] { flowFile, file, millis, dataRate });
            session.commit();
        } catch (final Throwable t) {
            getLogger().error("Error retrieving file {} from HDFS due to {}", new Object[] { file, t });
            session.rollback();
            context.yield();
        } finally {
            IOUtils.closeQuietly(stream);
            stream = null;
        }
    }
}

From source file:org.apache.nifi.processors.hadoop.PutHDFS.java

License:Apache License

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;/*from w w  w  .j  a  va2 s .c o  m*/
    }

    final FileSystem hdfs = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();

    if (configuration == null || hdfs == null || ugi == null) {
        getLogger().error("HDFS not configured properly");
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
        return;
    }

    ugi.doAs(new PrivilegedAction<Object>() {
        @Override
        public Object run() {
            Path tempDotCopyFile = null;
            FlowFile putFlowFile = flowFile;
            try {
                final String dirValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile)
                        .getValue();
                final Path configuredRootDirPath = new Path(dirValue);

                final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue();

                final Double blockSizeProp = context.getProperty(BLOCK_SIZE).asDataSize(DataUnit.B);
                final long blockSize = blockSizeProp != null ? blockSizeProp.longValue()
                        : hdfs.getDefaultBlockSize(configuredRootDirPath);

                final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B);
                final int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue()
                        : configuration.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT);

                final Integer replicationProp = context.getProperty(REPLICATION_FACTOR).asInteger();
                final short replication = replicationProp != null ? replicationProp.shortValue()
                        : hdfs.getDefaultReplication(configuredRootDirPath);

                final CompressionCodec codec = getCompressionCodec(context, configuration);

                final String filename = codec != null
                        ? putFlowFile.getAttribute(CoreAttributes.FILENAME.key()) + codec.getDefaultExtension()
                        : putFlowFile.getAttribute(CoreAttributes.FILENAME.key());

                final Path tempCopyFile = new Path(configuredRootDirPath, "." + filename);
                final Path copyFile = new Path(configuredRootDirPath, filename);

                // Create destination directory if it does not exist
                try {
                    if (!hdfs.getFileStatus(configuredRootDirPath).isDirectory()) {
                        throw new IOException(
                                configuredRootDirPath.toString() + " already exists and is not a directory");
                    }
                } catch (FileNotFoundException fe) {
                    if (!hdfs.mkdirs(configuredRootDirPath)) {
                        throw new IOException(configuredRootDirPath.toString() + " could not be created");
                    }
                    changeOwner(context, hdfs, configuredRootDirPath, flowFile);
                }

                final boolean destinationExists = hdfs.exists(copyFile);

                // If destination file already exists, resolve that based on processor configuration
                if (destinationExists) {
                    switch (conflictResponse) {
                    case REPLACE_RESOLUTION:
                        if (hdfs.delete(copyFile, false)) {
                            getLogger().info("deleted {} in order to replace with the contents of {}",
                                    new Object[] { copyFile, putFlowFile });
                        }
                        break;
                    case IGNORE_RESOLUTION:
                        session.transfer(putFlowFile, REL_SUCCESS);
                        getLogger().info(
                                "transferring {} to success because file with same name already exists",
                                new Object[] { putFlowFile });
                        return null;
                    case FAIL_RESOLUTION:
                        session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                        getLogger().warn(
                                "penalizing {} and routing to failure because file with same name already exists",
                                new Object[] { putFlowFile });
                        return null;
                    default:
                        break;
                    }
                }

                // Write FlowFile to temp file on HDFS
                final StopWatch stopWatch = new StopWatch(true);
                session.read(putFlowFile, new InputStreamCallback() {

                    @Override
                    public void process(InputStream in) throws IOException {
                        OutputStream fos = null;
                        Path createdFile = null;
                        try {
                            if (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && destinationExists) {
                                fos = hdfs.append(copyFile, bufferSize);
                            } else {
                                fos = hdfs.create(tempCopyFile, true, bufferSize, replication, blockSize);
                            }
                            if (codec != null) {
                                fos = codec.createOutputStream(fos);
                            }
                            createdFile = tempCopyFile;
                            BufferedInputStream bis = new BufferedInputStream(in);
                            StreamUtils.copy(bis, fos);
                            bis = null;
                            fos.flush();
                        } finally {
                            try {
                                if (fos != null) {
                                    fos.close();
                                }
                            } catch (RemoteException re) {
                                // when talking to remote HDFS clusters, we don't notice problems until fos.close()
                                if (createdFile != null) {
                                    try {
                                        hdfs.delete(createdFile, false);
                                    } catch (Throwable ignore) {
                                    }
                                }
                                throw re;
                            } catch (Throwable ignore) {
                            }
                            fos = null;
                        }
                    }

                });
                stopWatch.stop();
                final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
                final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
                tempDotCopyFile = tempCopyFile;

                if (!conflictResponse.equals(APPEND_RESOLUTION_AV.getValue())
                        || (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && !destinationExists)) {
                    boolean renamed = false;
                    for (int i = 0; i < 10; i++) { // try to rename multiple times.
                        if (hdfs.rename(tempCopyFile, copyFile)) {
                            renamed = true;
                            break;// rename was successful
                        }
                        Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve
                    }
                    if (!renamed) {
                        hdfs.delete(tempCopyFile, false);
                        throw new ProcessException("Copied file to HDFS but could not rename dot file "
                                + tempCopyFile + " to its final filename");
                    }

                    changeOwner(context, hdfs, copyFile, flowFile);
                }

                getLogger().info("copied {} to HDFS at {} in {} milliseconds at a rate of {}",
                        new Object[] { putFlowFile, copyFile, millis, dataRate });

                final String newFilename = copyFile.getName();
                final String hdfsPath = copyFile.getParent().toString();
                putFlowFile = session.putAttribute(putFlowFile, CoreAttributes.FILENAME.key(), newFilename);
                putFlowFile = session.putAttribute(putFlowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
                final Path qualifiedPath = copyFile.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory());
                session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());

                session.transfer(putFlowFile, REL_SUCCESS);

            } catch (final Throwable t) {
                if (tempDotCopyFile != null) {
                    try {
                        hdfs.delete(tempDotCopyFile, false);
                    } catch (Exception e) {
                        getLogger().error("Unable to remove temporary file {} due to {}",
                                new Object[] { tempDotCopyFile, e });
                    }
                }
                getLogger().error("Failed to write to HDFS due to {}", new Object[] { t });
                session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                context.yield();
            }

            return null;
        }
    });
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextOutputFormat.java

License:Apache License

@Override
public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }/*from   w  w w  . j  a  v a2s .c  om*/
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new PigLineRecordWriter(fileOut, fieldDel);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new PigLineRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)), fieldDel);
    }
}

From source file:org.apache.sqoop.connector.hdfs.HdfsLoader.java

License:Apache License

private static String getExtension(ToJobConfiguration toJobConf, CompressionCodec codec) {
    if (toJobConf.toJobConfig.outputFormat == ToFormat.SEQUENCE_FILE)
        return ".seq";
    if (codec == null)
        return ".txt";
    return codec.getDefaultExtension();
}