List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:com.facebook.hiveio.input.HiveApiInputFormat.java
License:Apache License
@Override public RecordReaderImpl createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); JobConf jobConf = new JobConf(conf); HInputSplit split = (HInputSplit) inputSplit; split.setConf(jobConf);/*www.jav a 2s .c o m*/ int[] columnIds = split.getColumnIds(); HiveUtils.setReadColumnIds(jobConf, columnIds); // CHECKSTYLE: stop LineLength org.apache.hadoop.mapred.RecordReader<WritableComparable, Writable> baseRecordReader = split .getBaseRecordReader(jobConf, context); // CHECKSTYLE: resume LineLength RecordParser<Writable> recordParser = getParser(baseRecordReader.createValue(), split, columnIds, conf); RecordReaderImpl reader = new RecordReaderImpl(baseRecordReader, recordParser); reader.setObserver(observer); return reader; }
From source file:com.facebook.hiveio.output.CheckOutputSpecsTest.java
License:Apache License
@Test(expectedExceptions = IOException.class) public void testTableDoesntExist() throws Exception { Configuration conf = new Configuration(); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail();//w w w . j a v a 2 s.co m }
From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java
License:Apache License
@Test(expectedExceptions = IOException.class) public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception { HiveMetastores.setTestClient(new FaultyThriftHiveMetastore(BackoffRetryTask.NUM_TRIES.getDefaultValue())); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail();//from w w w . j a v a 2 s. c o m }
From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java
License:Apache License
@Test public void testRecoveredFromFailuresAfterRetries() throws Exception { FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("foo"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); HiveMetastores.setTestClient(metastore); outputFormat.checkOutputSpecs(jobContext); assertEquals(BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls()); }
From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java
License:Apache License
/** * Get the base Hadoop RecordWriter.//from w w w. j a v a 2s.co m * @param taskAttemptContext TaskAttemptContext * @param baseOutputFormat Hadoop OutputFormat * @return RecordWriter * @throws IOException Hadoop issues */ // CHECKSTYLE: stop LineLengthCheck protected static org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> getBaseRecordWriter( TaskAttemptContext taskAttemptContext, org.apache.hadoop.mapred.OutputFormat baseOutputFormat) throws IOException { // CHECKSTYLE: resume LineLengthCheck HadoopUtils.setWorkOutputDir(taskAttemptContext); JobConf jobConf = new JobConf(taskAttemptContext.getConfiguration()); int fileId = CREATED_FILES_COUNTER.incrementAndGet(); String name = FileOutputFormat.getUniqueName(jobConf, "part-" + fileId); Reporter reporter = new ProgressReporter(taskAttemptContext); org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = baseOutputFormat .getRecordWriter(null, jobConf, name, reporter); LOG.info("getBaseRecordWriter: Created new {} with file {}", baseWriter, name); return baseWriter; }
From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java
License:Apache License
@Override public HiveApiOutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { HadoopUtils.setWorkOutputDir(taskAttemptContext); Configuration conf = taskAttemptContext.getConfiguration(); JobConf jobConf = new JobConf(conf); OutputCommitter baseCommitter = jobConf.getOutputCommitter(); LOG.info("Getting output committer with base output committer {}", baseCommitter.getClass().getSimpleName()); return new HiveApiOutputCommitter(new HackOutputCommitter(baseCommitter, jobConf), myProfileId); }
From source file:com.facebook.hiveio.output.HiveOutput.java
License:Apache License
/** * Write records to a Hive table//w w w . j a v a 2 s .c o m * * @param outputDesc description of Hive table * @param records the records to write * @throws TException * @throws IOException * @throws InterruptedException */ public static void writeTable(HiveOutputDescription outputDesc, Iterable<HiveWritableRecord> records) throws TException, IOException, InterruptedException { long uniqueId = System.nanoTime(); String taskAttemptIdStr = "attempt_200707121733_" + (int) uniqueId + "_m_000005_0"; String profile = Long.toString(uniqueId); HiveConf conf = new HiveConf(HiveOutput.class); conf.setInt("mapred.task.partition", 1); conf.set("mapred.task.id", taskAttemptIdStr); HiveApiOutputFormat.initProfile(conf, outputDesc, profile); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(profile); TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskAttemptIdStr); TaskAttemptContext taskContext = new HackTaskAttemptContext(new JobConf(conf), taskAttemptID); JobContext jobContext = new HackJobContext(new JobConf(conf), taskAttemptID.getJobID()); RecordWriterImpl recordWriter = outputFormat.getRecordWriter(taskContext); HiveApiOutputCommitter committer = outputFormat.getOutputCommitter(taskContext); committer.setupJob(jobContext); committer.setupTask(taskContext); for (HiveWritableRecord record : records) { recordWriter.write(NullWritable.get(), record); } recordWriter.close(taskContext); committer.commitTask(taskContext); committer.commitJob(jobContext); }
From source file:com.facebook.hiveio.output.PerThread.java
License:Apache License
/** * Get task context/*from w ww . j ava 2 s . c o m*/ * * @return task context */ public TaskAttemptContext taskContext() { return new HackTaskAttemptContext(new JobConf(conf), taskID); }
From source file:com.facebook.hiveio.output.PerThread.java
License:Apache License
/** * Get JobConf * * @return JobConf */ public JobConf jobConf() { return new JobConf(conf); }
From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java
License:Apache License
private void loadPartition(HivePartitionMetadata partition) throws IOException { String partitionName = partition.getHivePartition().getPartitionId(); Properties schema = getPartitionSchema(table, partition.getPartition()); List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition()); TupleDomain<HiveColumnHandle> effectivePredicate = partition.getHivePartition().getEffectivePredicate(); Path path = new Path(getPartitionLocation(table, partition.getPartition())); Configuration configuration = hdfsEnvironment.getConfiguration(path); InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false); FileSystem fs = hdfsEnvironment.getFileSystem(session.getUser(), path); if (inputFormat instanceof SymlinkTextInputFormat) { if (bucketHandle.isPresent()) { throw new PrestoException(StandardErrorCode.NOT_SUPPORTED, "Bucketed table in SymlinkTextInputFormat is not yet supported"); }//from w w w .j av a 2 s.co m // TODO: This should use an iterator like the HiveFileIterator for (Path targetPath : getTargetPathsFromSymlink(fs, path)) { // The input should be in TextInputFormat. TextInputFormat targetInputFormat = new TextInputFormat(); // get the configuration for the target path -- it may be a different hdfs instance Configuration targetConfiguration = hdfsEnvironment.getConfiguration(targetPath); JobConf targetJob = new JobConf(targetConfiguration); targetJob.setInputFormat(TextInputFormat.class); targetInputFormat.configure(targetJob); FileInputFormat.setInputPaths(targetJob, targetPath); InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0); for (InputSplit inputSplit : targetSplits) { FileSplit split = (FileSplit) inputSplit; FileSystem targetFilesystem = hdfsEnvironment.getFileSystem(session.getUser(), split.getPath()); FileStatus file = targetFilesystem.getFileStatus(split.getPath()); hiveSplitSource.addToQueue(createHiveSplits(partitionName, file.getPath().toString(), targetFilesystem.getFileBlockLocations(file, split.getStart(), split.getLength()), split.getStart(), split.getLength(), schema, partitionKeys, false, session, OptionalInt.empty(), effectivePredicate, partition.getColumnCoercions())); if (stopped) { return; } } } return; } // If only one bucket could match: load that one file HiveFileIterator iterator = new HiveFileIterator(path, fs, directoryLister, namenodeStats, partitionName, inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions()); if (!buckets.isEmpty()) { int bucketCount = buckets.get(0).getBucketCount(); List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount); for (HiveBucket bucket : buckets) { int bucketNumber = bucket.getBucketNumber(); LocatedFileStatus file = list.get(bucketNumber); boolean splittable = isSplittable(iterator.getInputFormat(), hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath()); hiveSplitSource.addToQueue(createHiveSplits(iterator.getPartitionName(), file.getPath().toString(), file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(), iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketNumber), effectivePredicate, partition.getColumnCoercions())); } return; } // If table is bucketed: list the directory, sort, tag with bucket id if (bucketHandle.isPresent()) { // HiveFileIterator skips hidden files automatically. int bucketCount = bucketHandle.get().getBucketCount(); List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount); for (int bucketIndex = 0; bucketIndex < bucketCount; bucketIndex++) { LocatedFileStatus file = list.get(bucketIndex); boolean splittable = isSplittable(iterator.getInputFormat(), hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath()); hiveSplitSource.addToQueue(createHiveSplits(iterator.getPartitionName(), file.getPath().toString(), file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(), iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketIndex), iterator.getEffectivePredicate(), partition.getColumnCoercions())); } return; } fileIterators.addLast(iterator); }