List of usage examples for org.apache.hadoop.mapred FileOutputFormat getUniqueName
public static String getUniqueName(JobConf conf, String name)
From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java
License:Apache License
/** * Get the base Hadoop RecordWriter.//from w w w . ja v a 2 s. com * @param taskAttemptContext TaskAttemptContext * @param baseOutputFormat Hadoop OutputFormat * @return RecordWriter * @throws IOException Hadoop issues */ // CHECKSTYLE: stop LineLengthCheck protected static org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> getBaseRecordWriter( TaskAttemptContext taskAttemptContext, org.apache.hadoop.mapred.OutputFormat baseOutputFormat) throws IOException { // CHECKSTYLE: resume LineLengthCheck HadoopUtils.setWorkOutputDir(taskAttemptContext); JobConf jobConf = new JobConf(taskAttemptContext.getConfiguration()); int fileId = CREATED_FILES_COUNTER.incrementAndGet(); String name = FileOutputFormat.getUniqueName(jobConf, "part-" + fileId); Reporter reporter = new ProgressReporter(taskAttemptContext); org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = baseOutputFormat .getRecordWriter(null, jobConf, name, reporter); LOG.info("getBaseRecordWriter: Created new {} with file {}", baseWriter, name); return baseWriter; }
From source file:org.apache.hcatalog.mapreduce.FileOutputFormatContainer.java
License:Apache License
@Override public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { //this needs to be manually set, under normal circumstances MR Task does this setWorkOutputPath(context);/*from w w w.ja v a 2 s . c o m*/ //Configure the output key and value classes. // This is required for writing null as key for file based tables. context.getConfiguration().set("mapred.output.key.class", NullWritable.class.getName()); String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO); OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString); StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), storeInfo); Class<? extends SerDe> serde = storageHandler.getSerDeClass(); SerDe sd = (SerDe) ReflectionUtils.newInstance(serde, context.getConfiguration()); context.getConfiguration().set("mapred.output.value.class", sd.getSerializedClass().getName()); RecordWriter<WritableComparable<?>, HCatRecord> rw; if (HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed()) { // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null. // (That's because records can't be written until the values of the dynamic partitions are deduced. // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.) rw = new FileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter) null, context); } else { Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir")); Path childPath = new Path(parentDir, FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), "part")); rw = new FileRecordWriterContainer(getBaseOutputFormat().getRecordWriter( parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()), childPath.toString(), InternalUtil.createReporter(context)), context); } return rw; }
From source file:org.apache.hive.hcatalog.mapreduce.FileOutputFormatContainer.java
License:Apache License
@Override public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { //this needs to be manually set, under normal circumstances MR Task does this setWorkOutputPath(context);/* w ww .j av a 2s. com*/ //Configure the output key and value classes. // This is required for writing null as key for file based tables. context.getConfiguration().set("mapred.output.key.class", NullWritable.class.getName()); String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO); OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString); StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), storeInfo); Class<? extends SerDe> serde = storageHandler.getSerDeClass(); SerDe sd = (SerDe) ReflectionUtils.newInstance(serde, context.getConfiguration()); context.getConfiguration().set("mapred.output.value.class", sd.getSerializedClass().getName()); RecordWriter<WritableComparable<?>, HCatRecord> rw; if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()) { // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null. // (That's because records can't be written until the values of the dynamic partitions are deduced. // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.) rw = new DynamicPartitionFileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter) null, context); } else { Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir")); Path childPath = new Path(parentDir, FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), context.getConfiguration().get("mapreduce.output.basename", "part"))); rw = new StaticPartitionFileRecordWriterContainer(getBaseOutputFormat().getRecordWriter( parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()), childPath.toString(), InternalUtil.createReporter(context)), context); } return rw; }