List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setCompressOutput
public static void setCompressOutput(Job job, boolean compress)
From source file:nl.utwente.trafficanalyzer.SensorCountPerRoadPerDay.java
License:Apache License
public void run(String inputPath, String outPath) throws Exception { Configuration conf = getConf(); Job job = Job.getInstance(conf);/* www . ja v a 2 s . c o m*/ job.setJarByClass(SensorCountPerRoadPerDay.class); job.setJobName(String.format("%s [%s, %s]", this.getClass().getName(), inputPath, outPath)); // -- check if output directory already exists; and optionally delete String outputAlreadyExistsOption = "exit"; Path outDir = new Path(outPath); if (FileSystem.get(conf).exists(outDir)) { if (outputAlreadyExistsOption.equalsIgnoreCase("delete")) { FileSystem.get(conf).delete(outDir, true); } else { System.err.println("Directory " + outPath + " already exists; exiting"); System.exit(1); } } // ---- Input (Format) Options String inputFormat = "text"; if (inputFormat.equalsIgnoreCase("text")) { job.setInputFormatClass(TextInputFormat.class); } else if (inputFormat.equalsIgnoreCase("text")) { job.setInputFormatClass(SequenceFileInputFormat.class); } // Utils.recursivelyAddInputPaths(job, new Path(inputPath)); FileInputFormat.addInputPath(job, new Path(inputPath)); // Add files that should be available localy at each mapper // Utils.addCacheFiles(job, new String[] { }); // ---- Mapper job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(MyMapper.KOUT); job.setMapOutputValueClass(MyMapper.VOUT); // ---- Combiner job.setCombinerClass(MyCombiner.class); // ---- Partitioner // job.setPartitionerClass(MyPartitioner.class); // ---- Reducer // set the number of reducers to influence the number of output files job.setNumReduceTasks(1); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(MyReducer.KOUT); job.setOutputValueClass(MyReducer.VOUT); // ---- Output Options String outputFormat = "text"; if (outputFormat.equalsIgnoreCase("sequence")) { job.setOutputFormatClass(SequenceFileOutputFormat.class); } else if (outputFormat.equalsIgnoreCase("text")) { job.setOutputFormatClass(TextOutputFormat.class); } else if (outputFormat.equalsIgnoreCase("null")) { job.setOutputFormatClass(NullOutputFormat.class); } FileOutputFormat.setOutputPath(job, outDir); FileOutputFormat.setCompressOutput(job, false); // ---- Start job job.waitForCompletion(true); return; }
From source file:nthu.scopelab.tsqr.ssvd.VJob.java
License:Apache License
public void start(Configuration conf, Path inputPathBt, Path inputUHatPath, Path inputSigmaPath, Path outputPath, int k, int numReduceTasks, int subRowSize, boolean vHalfSigma, int mis) throws ClassNotFoundException, InterruptedException, IOException { job = new Job(conf); job.setJobName("V-job"); job.setJarByClass(VJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, inputPathBt); FileSystem fs = FileSystem.get(job.getConfiguration()); fileGather fgather = new fileGather(inputPathBt, "", fs); mis = Checker.checkMis(mis, fgather.getInputSize(), fs); FileInputFormat.setMaxInputSplitSize(job, mis * 1024 * 1024); FileOutputFormat.setOutputPath(job, outputPath); // Warn: tight hadoop integration here: job.getConfiguration().set("mapreduce.output.basename", OUTPUT_V); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(LMatrixWritable.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LMatrixWritable.class); job.setMapperClass(VMapper.class); job.getConfiguration().set(PROP_UHAT_PATH, inputUHatPath.toString()); job.getConfiguration().set(PROP_SIGMA_PATH, inputSigmaPath.toString()); if (vHalfSigma) { job.getConfiguration().set(PROP_V_HALFSIGMA, "y"); }// w ww . j av a 2 s . com job.getConfiguration().setInt(QJob.PROP_K, k); job.getConfiguration().setInt(SUB_ROW_SIZE, subRowSize); job.setNumReduceTasks(0); job.submit(); //job.waitForCompletion(true); }
From source file:org.apache.jena.grande.pig.RdfStorage.java
License:Apache License
@SuppressWarnings("unchecked") @Override//from w w w . java 2 s .c o m public void setStoreLocation(String location, Job job) throws IOException { log.debug("setStoreLocation({}, {})", location, job); job.getConfiguration().set("mapred.textoutputformat.separator", ""); FileOutputFormat.setOutputPath(job, new Path(location)); if ("true".equals(job.getConfiguration().get("output.compression.enabled"))) { FileOutputFormat.setCompressOutput(job, true); String codec = job.getConfiguration().get("output.compression.codec"); try { FileOutputFormat.setOutputCompressorClass(job, (Class<? extends CompressionCodec>) Class.forName(codec)); } catch (ClassNotFoundException e) { throw new RuntimeException("Class not found: " + codec); } } else { if (location.endsWith(".bz2") || location.endsWith(".bz")) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); } else if (location.endsWith(".gz")) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { FileOutputFormat.setCompressOutput(job, false); } } }
From source file:org.apache.kylin.engine.spark.SparkFactDistinct.java
License:Apache License
@Override protected void execute(OptionsHelper optionsHelper) throws Exception { String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME); String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL); String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID); String hiveTable = optionsHelper.getOptionValue(OPTION_INPUT_TABLE); String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH); String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH); String counterPath = optionsHelper.getOptionValue(OPTION_COUNTER_PATH); int samplingPercent = Integer.parseInt(optionsHelper.getOptionValue(OPTION_STATS_SAMPLING_PERCENT)); Class[] kryoClassArray = new Class[] { Class.forName("scala.reflect.ClassTag$$anon$1"), Class.forName("org.apache.kylin.engine.mr.steps.SelfDefineSortableKey") }; SparkConf conf = new SparkConf() .setAppName("Fact distinct columns for:" + cubeName + " segment " + segmentId); //serialization conf conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.kryo.registrator", "org.apache.kylin.engine.spark.KylinKryoRegistrator"); conf.set("spark.kryo.registrationRequired", "true").registerKryoClasses(kryoClassArray); KylinSparkJobListener jobListener = new KylinSparkJobListener(); try (JavaSparkContext sc = new JavaSparkContext(conf)) { sc.sc().addSparkListener(jobListener); HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(outputPath)); final SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration()); KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl); final CubeInstance cubeInstance = CubeManager.getInstance(envConfig).getCube(cubeName); final Job job = Job.getInstance(sConf.get()); final FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping( cubeInstance);//from w w w . jav a 2 s .c o m logger.info("RDD Output path: {}", outputPath); logger.info("getTotalReducerNum: {}", reducerMapping.getTotalReducerNum()); logger.info("getCuboidRowCounterReducerNum: {}", reducerMapping.getCuboidRowCounterReducerNum()); logger.info("counter path {}", counterPath); boolean isSequenceFile = JoinedFlatTable.SEQUENCEFILE .equalsIgnoreCase(envConfig.getFlatTableStorageFormat()); // calculate source record bytes size final LongAccumulator bytesWritten = sc.sc().longAccumulator(); final JavaRDD<String[]> recordRDD = SparkUtil.hiveRecordInputRDD(isSequenceFile, sc, inputPath, hiveTable); JavaPairRDD<SelfDefineSortableKey, Text> flatOutputRDD = recordRDD.mapPartitionsToPair( new FlatOutputFucntion(cubeName, segmentId, metaUrl, sConf, samplingPercent, bytesWritten)); JavaPairRDD<SelfDefineSortableKey, Iterable<Text>> aggredRDD = flatOutputRDD.groupByKey( new FactDistinctPartitioner(cubeName, metaUrl, sConf, reducerMapping.getTotalReducerNum())); JavaPairRDD<String, Tuple3<Writable, Writable, String>> outputRDD = aggredRDD .mapPartitionsToPair(new MultiOutputFunction(cubeName, metaUrl, sConf, samplingPercent)); // make each reducer output to respective dir MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileOutputFormat.setCompressOutput(job, false); // prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputsRDD multipleOutputsRDD = MultipleOutputsRDD.rddToMultipleOutputsRDD(outputRDD); multipleOutputsRDD.saveAsNewAPIHadoopDatasetWithMultipleOutputs(job.getConfiguration()); long recordCount = recordRDD.count(); logger.info("Map input records={}", recordCount); logger.info("HDFS Read: {} HDFS Write", bytesWritten.value()); Map<String, String> counterMap = Maps.newHashMap(); counterMap.put(ExecutableConstants.SOURCE_RECORDS_COUNT, String.valueOf(recordCount)); counterMap.put(ExecutableConstants.SOURCE_RECORDS_SIZE, String.valueOf(bytesWritten.value())); // save counter to hdfs HadoopUtil.writeToSequenceFile(sc.hadoopConfiguration(), counterPath, counterMap); HadoopUtil.deleteHDFSMeta(metaUrl); } }
From source file:org.apache.kylin.source.kafka.hadoop.KafkaFlatTableJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {//from ww w . j a va 2 s . com options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_SEGMENT_ID); parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String cubeName = getOptionValue(OPTION_CUBE_NAME); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String segmentId = getOptionValue(OPTION_SEGMENT_ID); // ---------------------------------------------------------------------------- // add metadata to distributed cache CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentId); logger.info("Starting: " + job.getJobName()); setJobClasspath(job, cube.getConfig()); KafkaConfigManager kafkaConfigManager = KafkaConfigManager .getInstance(KylinConfig.getInstanceFromEnv()); KafkaConfig kafkaConfig = kafkaConfigManager.getKafkaConfig(cube.getRootFactTable()); String brokers = KafkaClient.getKafkaBrokers(kafkaConfig); String topic = kafkaConfig.getTopic(); if (brokers == null || brokers.length() == 0 || topic == null) { throw new IllegalArgumentException( "Invalid Kafka information, brokers " + brokers + ", topic " + topic); } JobEngineConfig jobEngineConfig = new JobEngineConfig(KylinConfig.getInstanceFromEnv()); job.getConfiguration().addResource(new Path(jobEngineConfig.getHadoopJobConfFilePath(null))); KafkaConsumerProperties kafkaConsumerProperties = KafkaConsumerProperties.getInstanceFromEnv(); job.getConfiguration().addResource(new Path(kafkaConsumerProperties.getKafkaConsumerHadoopJobConf())); appendKafkaOverrideProperties(KylinConfig.getInstanceFromEnv(), job.getConfiguration()); job.getConfiguration().set(CONFIG_KAFKA_BROKERS, brokers); job.getConfiguration().set(CONFIG_KAFKA_TOPIC, topic); job.getConfiguration().set(CONFIG_KAFKA_TIMEOUT, String.valueOf(kafkaConfig.getTimeout())); job.getConfiguration().set(CONFIG_KAFKA_INPUT_FORMAT, "json"); job.getConfiguration().set(CONFIG_KAFKA_PARSER_NAME, kafkaConfig.getParserName()); job.getConfiguration().set(CONFIG_KAFKA_CONSUMER_GROUP, cubeName); // use cubeName as consumer group name setupMapper(cube.getSegmentById(segmentId)); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); org.apache.log4j.Logger.getRootLogger().info("Output hdfs location: " + output); org.apache.log4j.Logger.getRootLogger().info("Output hdfs compression: " + true); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { logger.error("error in KafkaFlatTableJob", e); printUsage(options); throw e; } finally { if (job != null) cleanupTempConfFile(job.getConfiguration()); } }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.QJob.java
License:Apache License
public static void run(Configuration conf, Path[] inputPaths, Path sbPath, Path outputPath, int aBlockRows, int minSplitSize, int k, int p, long seed, int numReduceTasks) throws ClassNotFoundException, InterruptedException, IOException { JobConf oldApiJob = new JobConf(conf); MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_QHAT, org.apache.hadoop.mapred.SequenceFileOutputFormat.class, SplitPartitionedWritable.class, DenseBlockWritable.class); MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_RHAT, org.apache.hadoop.mapred.SequenceFileOutputFormat.class, SplitPartitionedWritable.class, VectorWritable.class); Job job = new Job(oldApiJob); job.setJobName("Q-job"); job.setJarByClass(QJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(job, inputPaths); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize); }//www . ja v a 2s . c o m FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setMapOutputKeyClass(SplitPartitionedWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(SplitPartitionedWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(QMapper.class); job.getConfiguration().setInt(PROP_AROWBLOCK_SIZE, aBlockRows); job.getConfiguration().setLong(PROP_OMEGA_SEED, seed); job.getConfiguration().setInt(PROP_K, k); job.getConfiguration().setInt(PROP_P, p); if (sbPath != null) { job.getConfiguration().set(PROP_SB_PATH, sbPath.toString()); } /* * number of reduce tasks doesn't matter. we don't actually send anything to * reducers. */ job.setNumReduceTasks(0 /* numReduceTasks */); job.submit(); job.waitForCompletion(false); if (!job.isSuccessful()) { throw new IOException("Q job unsuccessful."); } }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.UJob.java
License:Apache License
public void run(Configuration conf, Path inputPathQ, Path inputUHatPath, Path sigmaPath, Path outputPath, int k, int numReduceTasks, Class<? extends Writable> labelClass, SSVDSolver.OutputScalingEnum outputScaling) throws ClassNotFoundException, InterruptedException, IOException { job = new Job(conf); job.setJobName("U-job"); job.setJarByClass(UJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, inputPathQ); FileOutputFormat.setOutputPath(job, outputPath); // WARN: tight hadoop integration here: job.getConfiguration().set("mapreduce.output.basename", OUTPUT_U); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setMapperClass(UMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(labelClass);/*from w w w .ja v a2 s.c o m*/ job.setOutputValueClass(VectorWritable.class); job.getConfiguration().set(PROP_UHAT_PATH, inputUHatPath.toString()); job.getConfiguration().set(PROP_SIGMA_PATH, sigmaPath.toString()); job.getConfiguration().set(PROP_OUTPUT_SCALING, outputScaling.name()); job.getConfiguration().setInt(PROP_K, k); job.setNumReduceTasks(0); job.submit(); }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.VJob.java
License:Apache License
/** * //from w ww .ja v a 2 s .co m * @param conf * @param inputPathBt * @param xiPath * PCA row mean (MAHOUT-817, to fix B') * @param sqPath * sq (MAHOUT-817, to fix B') * @param inputUHatPath * @param inputSigmaPath * @param outputPath * @param k * @param numReduceTasks * @param outputScaling output scaling: apply Sigma, or Sigma^0.5, or none * @throws ClassNotFoundException * @throws InterruptedException * @throws IOException */ public void run(Configuration conf, Path inputPathBt, Path xiPath, Path sqPath, Path inputUHatPath, Path inputSigmaPath, Path outputPath, int k, int numReduceTasks, SSVDSolver.OutputScalingEnum outputScaling) throws ClassNotFoundException, InterruptedException, IOException { job = new Job(conf); job.setJobName("V-job"); job.setJarByClass(VJob.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, inputPathBt); FileOutputFormat.setOutputPath(job, outputPath); // Warn: tight hadoop integration here: job.getConfiguration().set("mapreduce.output.basename", OUTPUT_V); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(VMapper.class); job.getConfiguration().set(PROP_UHAT_PATH, inputUHatPath.toString()); job.getConfiguration().set(PROP_SIGMA_PATH, inputSigmaPath.toString()); job.getConfiguration().set(PROP_OUTPUT_SCALING, outputScaling.name()); job.getConfiguration().setInt(PROP_K, k); job.setNumReduceTasks(0); /* * PCA-related options, MAHOUT-817 */ if (xiPath != null) { job.getConfiguration().set(PROP_XI_PATH, xiPath.toString()); job.getConfiguration().set(PROP_SQ_PATH, sqPath.toString()); } job.submit(); }
From source file:org.apache.mahout.text.SequenceFilesFromDirectory.java
License:Apache License
private int runMapReduce(Path input, Path output) throws IOException, ClassNotFoundException, InterruptedException { int chunkSizeInMB = 64; if (hasOption(CHUNK_SIZE_OPTION[0])) { chunkSizeInMB = Integer.parseInt(getOption(CHUNK_SIZE_OPTION[0])); }// ww w .j ava 2 s .c o m String keyPrefix = null; if (hasOption(KEY_PREFIX_OPTION[0])) { keyPrefix = getOption(KEY_PREFIX_OPTION[0]); } String fileFilterClassName = null; if (hasOption(FILE_FILTER_CLASS_OPTION[0])) { fileFilterClassName = getOption(FILE_FILTER_CLASS_OPTION[0]); } PathFilter pathFilter = null; // Prefix Addition is presently handled in the Mapper and unlike runsequential() // need not be done via a pathFilter if (!StringUtils.isBlank(fileFilterClassName) && !PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) { try { pathFilter = (PathFilter) Class.forName(fileFilterClassName).newInstance(); } catch (InstantiationException e) { throw new IllegalStateException(e); } catch (IllegalAccessException e) { throw new IllegalStateException(e); } } // Prepare Job for submission. Job job = prepareJob(input, output, MultipleTextFileInputFormat.class, SequenceFilesFromDirectoryMapper.class, Text.class, Text.class, SequenceFileOutputFormat.class, "SequenceFilesFromDirectory"); Configuration jobConfig = job.getConfiguration(); jobConfig.set(KEY_PREFIX_OPTION[0], keyPrefix); jobConfig.set(FILE_FILTER_CLASS_OPTION[0], fileFilterClassName); FileSystem fs = FileSystem.get(jobConfig); FileStatus fsFileStatus = fs.getFileStatus(input); String inputDirList; if (pathFilter != null) { inputDirList = HadoopUtil.buildDirList(fs, fsFileStatus, pathFilter); } else { inputDirList = HadoopUtil.buildDirList(fs, fsFileStatus); } jobConfig.set(BASE_INPUT_PATH, input.toString()); long chunkSizeInBytes = chunkSizeInMB * 1024 * 1024; // set the max split locations, otherwise we get nasty debug stuff jobConfig.set("mapreduce.job.max.split.locations", String.valueOf(MAX_JOB_SPLIT_LOCATIONS)); FileInputFormat.setInputPaths(job, inputDirList); // need to set this to a multiple of the block size, or no split happens FileInputFormat.setMaxInputSplitSize(job, chunkSizeInBytes); FileOutputFormat.setCompressOutput(job, true); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { return -1; } return 0; }
From source file:org.apache.pig.builtin.PigStorage.java
License:Apache License
@Override public void setStoreLocation(String location, Job job) throws IOException { job.getConfiguration().set(MRConfiguration.TEXTOUTPUTFORMAT_SEPARATOR, ""); FileOutputFormat.setOutputPath(job, new Path(location)); if ("true".equals(job.getConfiguration().get("output.compression.enabled"))) { FileOutputFormat.setCompressOutput(job, true); String codec = job.getConfiguration().get("output.compression.codec"); try {// ww w . j a v a 2 s . c o m FileOutputFormat.setOutputCompressorClass(job, (Class<? extends CompressionCodec>) Class.forName(codec)); } catch (ClassNotFoundException e) { throw new RuntimeException("Class not found: " + codec); } } else { // This makes it so that storing to a directory ending with ".gz" or ".bz2" works. setCompression(new Path(location), job); } }