Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS_CODEC

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS_CODEC

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS_CODEC.

Prototype

String COMPRESS_CODEC

To view the source code for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS_CODEC.

Click Source Link

Document

If compression is enabled, name of codec: .

Usage

From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java

License:Apache License

/**
 * Set the {@link CompressionCodec} to be used to compress job outputs.
 * /* w w w .  j a  va 2  s  . co  m*/
 * @param job
 *            the job to modify
 * @param codecClass
 *            the {@link CompressionCodec} to be used to compress the job
 *            outputs
 */
public static void setOutputCompressorClass(Job job, Class<? extends CompressionCodec> codecClass) {
    setCompressOutput(job, true);
    job.getConfiguration().setClass(FileOutputFormat.COMPRESS_CODEC, codecClass, CompressionCodec.class);
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java

License:Apache License

/**
 * Get the {@link CompressionCodec} for compressing the job outputs.
 * // w w w .ja  va  2 s  .c o  m
 * @param job
 *            the {@link Job} to look in
 * @param defaultValue
 *            the {@link CompressionCodec} to return if not set
 * @return the {@link CompressionCodec} to be used to compress the job
 *         outputs
 * @throws IllegalArgumentException
 *             if the class was specified, but not found
 */
public static Class<? extends CompressionCodec> getOutputCompressorClass(JobContext job,
        Class<? extends CompressionCodec> defaultValue) {
    Class<? extends CompressionCodec> codecClass = defaultValue;
    Configuration conf = job.getConfiguration();
    String name = conf.get(FileOutputFormat.COMPRESS_CODEC);
    if (name != null) {
        try {
            codecClass = conf.getClassByName(name).asSubclass(CompressionCodec.class);
        } catch (ClassNotFoundException e) {
            throw new IllegalArgumentException("Compression codec " + name + " was not found.", e);
        }
    }
    return codecClass;
}

From source file:com.cloudera.oryx.computation.common.JobStep.java

License:Open Source License

protected final Target compressedTextOutput(Configuration conf, String outputPathKey) {
    // The way this is used, it doesn't seem like we can just set the object in getConf(). Need
    // to set the copy in the MRPipeline directly?
    conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class);
    conf.setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);
    return To.textFile(Namespaces.toPath(outputPathKey));
}

From source file:com.facebook.presto.hive.HdfsConfigurationUpdater.java

License:Apache License

public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) {
    boolean compression = compressionCodec != HiveCompressionCodec.NONE;
    config.setBoolean(COMPRESSRESULT.varname, compression);
    config.setBoolean("mapred.output.compress", compression);
    config.setBoolean(FileOutputFormat.COMPRESS, compression);
    // For DWRF//ww w .j a va  2 s  .  c  om
    config.set(HIVE_ORC_DEFAULT_COMPRESS.varname, compressionCodec.getOrcCompressionKind().name());
    config.set(HIVE_ORC_COMPRESSION.varname, compressionCodec.getOrcCompressionKind().name());
    // For ORC
    config.set(OrcTableProperties.COMPRESSION.getPropName(), compressionCodec.getOrcCompressionKind().name());
    // For RCFile and Text
    if (compressionCodec.getCodec().isPresent()) {
        config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName());
        config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName());
    } else {
        config.unset("mapred.output.compression.codec");
        config.unset(FileOutputFormat.COMPRESS_CODEC);
    }
    // For Parquet
    config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name());
    // For SequenceFile
    config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString());
}

From source file:com.facebook.presto.hive.RcFileFileWriterFactory.java

License:Apache License

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames,
        StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) {
    if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }//from   ww  w  . j a  va2  s .  c  o m

    if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }

    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }

    Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));

    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings()
            .splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream()
            .map(hiveType -> hiveType.getType(typeManager)).collect(toList());

    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();

    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        OutputStream outputStream = fileSystem.create(path);

        Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path),
                            fileSystem.getFileStatus(path).getLen(), stats);
                } catch (IOException e) {
                    throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }

        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };

        return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes,
                codecName, fileInputColumnIndexes,
                ImmutableMap.<String, String>builder()
                        .put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString())
                        .put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(),
                validationInputFactory));
    } catch (Exception e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
    }
}

From source file:io.druid.indexer.UtilsCompressionTest.java

License:Apache License

@Before
public void setUp() throws IOException {
    jobConfig = new Configuration();
    mockJobContext = EasyMock.createMock(JobContext.class);
    EasyMock.expect(mockJobContext.getConfiguration()).andReturn(jobConfig).anyTimes();
    EasyMock.replay(mockJobContext);//  ww w .java  2s.com

    jobConfig.setBoolean(FileOutputFormat.COMPRESS, true);
    jobConfig.set(FileOutputFormat.COMPRESS_CODEC, CODEC_CLASS);
    Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(mockJobContext,
            DEFAULT_COMPRESSION_CODEC);
    codec = ReflectionUtils.newInstance(codecClass, jobConfig);

    tmpFile = tmpFolder.newFile(TMP_FILE_NAME + codec.getDefaultExtension());
    tmpPathWithExtension = new Path(tmpFile.getAbsolutePath());
    tmpPathWithoutExtension = new Path(tmpFile.getParent(), TMP_FILE_NAME);
    defaultFileSystem = tmpPathWithoutExtension.getFileSystem(jobConfig);
}

From source file:io.prestosql.plugin.hive.HdfsConfigurationInitializer.java

License:Apache License

public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) {
    boolean compression = compressionCodec != HiveCompressionCodec.NONE;
    config.setBoolean(COMPRESSRESULT.varname, compression);
    config.setBoolean("mapred.output.compress", compression);
    config.setBoolean(FileOutputFormat.COMPRESS, compression);
    // For DWRF/* ww w.  ja va 2 s.  c  om*/
    com.facebook.hive.orc.OrcConf.setVar(config, HIVE_ORC_COMPRESSION,
            compressionCodec.getOrcCompressionKind().name());
    // For ORC
    OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name());
    // For RCFile and Text
    if (compressionCodec.getCodec().isPresent()) {
        config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName());
        config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName());
    } else {
        config.unset("mapred.output.compression.codec");
        config.unset(FileOutputFormat.COMPRESS_CODEC);
    }
    // For Parquet
    config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name());
    // For SequenceFile
    config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString());
}

From source file:org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceCompressionSettingProcessor.java

License:Apache License

@Override
public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) {
    StringBuilder sb = new StringBuilder();
    List<String> optSettings = new ArrayList<>();

    JobConf jobconf = new JobConf(context.getJobconf());
    if (jobconf.getLong(NUM_REDUCES, 0) > 0) {
        if (!jobconf.getCompressMapOutput()) {
            optSettings.add(String.format("%s=true", MAP_OUTPUT_COMPRESS));
            sb.append("Please set " + MAP_OUTPUT_COMPRESS + " to true to reduce network IO.\n");
        } else {//from  w ww  .  jav a  2  s  .c o m
            String codecClassName = jobconf.get(MAP_OUTPUT_COMPRESS_CODEC);
            if (!(codecClassName.endsWith("LzoCodec") || codecClassName.endsWith("SnappyCodec"))) {
                optSettings.add(String.format("%s=LzoCodec or SnappyCodec", MAP_OUTPUT_COMPRESS_CODEC));
                sb.append("Best practice: use LzoCodec or SnappyCodec for " + MAP_OUTPUT_COMPRESS_CODEC)
                        .append("\n");
            }
        }
    }

    if (!jobconf.getBoolean(FileOutputFormat.COMPRESS, false)) {
        optSettings.add(String.format("%s=true", FileOutputFormat.COMPRESS));
        sb.append(
                "Please set " + FileOutputFormat.COMPRESS + " to true to reduce disk usage and network IO.\n");
    } else {
        String codecName = jobconf.get(FileOutputFormat.COMPRESS_CODEC, "");
        String outputFileFormat = jobconf.get(OUTPUT_FORMAT_CLASS_ATTR, "");

        if ((codecName.endsWith("GzipCodec") || codecName.endsWith("SnappyCodec")
                || codecName.endsWith("DefaultCodec")) && outputFileFormat.endsWith("TextOutputFormat")) {
            sb.append("Best practice: don't use Gzip/Snappy/DefaultCodec with TextOutputFormat");
            sb.append(" as this will cause the output files to be unsplittable. ");
            sb.append("Please use LZO instead or ");
            sb.append("use a container file format such as SequenceFileOutputFormat.\n");
        }
    }

    if (sb.length() > 0) {
        return new Result.ProcessorResult(Result.RuleType.COMPRESS, Result.ResultLevel.INFO, sb.toString(),
                optSettings);
    }
    return null;
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.examples.HadoopWordCount2.java

License:Apache License

/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change.// w  w w  .  j a  v  a  2 s  . co  m
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer,
        boolean outputCompression) {
    if (setMapper) {
        job.setMapperClass(HadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(HadoopWordCount2Combiner.class);

    if (setReducer) {
        job.setReducerClass(HadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }

    if (outputCompression) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

        SequenceFileOutputFormat.setCompressOutput(job, true);

        job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.util.MRToTezHelper.java

License:Apache License

private static void populateMRSettingsToRetain() {

    // FileInputFormat
    mrSettingsToRetain.add(FileInputFormat.INPUT_DIR);
    mrSettingsToRetain.add(FileInputFormat.SPLIT_MAXSIZE);
    mrSettingsToRetain.add(FileInputFormat.SPLIT_MINSIZE);
    mrSettingsToRetain.add(FileInputFormat.PATHFILTER_CLASS);
    mrSettingsToRetain.add(FileInputFormat.NUM_INPUT_FILES);
    mrSettingsToRetain.add(FileInputFormat.INPUT_DIR_RECURSIVE);

    // FileOutputFormat
    mrSettingsToRetain.add(MRConfiguration.OUTPUT_BASENAME);
    mrSettingsToRetain.add(FileOutputFormat.COMPRESS);
    mrSettingsToRetain.add(FileOutputFormat.COMPRESS_CODEC);
    mrSettingsToRetain.add(FileOutputFormat.COMPRESS_TYPE);
    mrSettingsToRetain.add(FileOutputFormat.OUTDIR);
    mrSettingsToRetain.add(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER);
}