Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults) 

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.facebook.hiveio.input.HiveApiInputFormat.java

License:Apache License

@Override
public RecordReaderImpl createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    JobConf jobConf = new JobConf(conf);

    HInputSplit split = (HInputSplit) inputSplit;
    split.setConf(jobConf);/*www.jav  a  2s .c o  m*/

    int[] columnIds = split.getColumnIds();
    HiveUtils.setReadColumnIds(jobConf, columnIds);

    // CHECKSTYLE: stop LineLength
    org.apache.hadoop.mapred.RecordReader<WritableComparable, Writable> baseRecordReader = split
            .getBaseRecordReader(jobConf, context);
    // CHECKSTYLE: resume LineLength

    RecordParser<Writable> recordParser = getParser(baseRecordReader.createValue(), split, columnIds, conf);

    RecordReaderImpl reader = new RecordReaderImpl(baseRecordReader, recordParser);
    reader.setObserver(observer);

    return reader;
}

From source file:com.facebook.hiveio.output.CheckOutputSpecsTest.java

License:Apache License

@Test(expectedExceptions = IOException.class)
public void testTableDoesntExist() throws Exception {
    Configuration conf = new Configuration();

    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("doesnt-exist");

    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);

    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);

    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());
    outputFormat.checkOutputSpecs(jobContext);
    fail();//w  w  w  .  j a  v a 2 s.co  m
}

From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java

License:Apache License

@Test(expectedExceptions = IOException.class)
public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception {
    HiveMetastores.setTestClient(new FaultyThriftHiveMetastore(BackoffRetryTask.NUM_TRIES.getDefaultValue()));
    Configuration conf = new Configuration();
    conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100);
    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("doesnt-exist");
    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);
    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());

    outputFormat.checkOutputSpecs(jobContext);

    fail();//from w  w  w  .  j a  v  a  2  s. c o m
}

From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java

License:Apache License

@Test
public void testRecoveredFromFailuresAfterRetries() throws Exception {
    FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore(
            BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1);

    Configuration conf = new Configuration();
    conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100);
    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("foo");
    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);
    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());

    HiveMetastores.setTestClient(metastore);

    outputFormat.checkOutputSpecs(jobContext);

    assertEquals(BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls());
}

From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java

License:Apache License

/**
 * Get the base Hadoop RecordWriter.//from  w w  w. j a v a 2s.co  m
 * @param taskAttemptContext TaskAttemptContext
 * @param baseOutputFormat Hadoop OutputFormat
 * @return RecordWriter
 * @throws IOException Hadoop issues
 */
// CHECKSTYLE: stop LineLengthCheck
protected static org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> getBaseRecordWriter(
        TaskAttemptContext taskAttemptContext, org.apache.hadoop.mapred.OutputFormat baseOutputFormat)
        throws IOException {
    // CHECKSTYLE: resume LineLengthCheck
    HadoopUtils.setWorkOutputDir(taskAttemptContext);
    JobConf jobConf = new JobConf(taskAttemptContext.getConfiguration());
    int fileId = CREATED_FILES_COUNTER.incrementAndGet();
    String name = FileOutputFormat.getUniqueName(jobConf, "part-" + fileId);
    Reporter reporter = new ProgressReporter(taskAttemptContext);
    org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = baseOutputFormat
            .getRecordWriter(null, jobConf, name, reporter);
    LOG.info("getBaseRecordWriter: Created new {} with file {}", baseWriter, name);
    return baseWriter;
}

From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java

License:Apache License

@Override
public HiveApiOutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    HadoopUtils.setWorkOutputDir(taskAttemptContext);
    Configuration conf = taskAttemptContext.getConfiguration();
    JobConf jobConf = new JobConf(conf);
    OutputCommitter baseCommitter = jobConf.getOutputCommitter();
    LOG.info("Getting output committer with base output committer {}",
            baseCommitter.getClass().getSimpleName());
    return new HiveApiOutputCommitter(new HackOutputCommitter(baseCommitter, jobConf), myProfileId);
}

From source file:com.facebook.hiveio.output.HiveOutput.java

License:Apache License

/**
 * Write records to a Hive table//w w  w . j a  v a 2  s .c  o  m
 *
 * @param outputDesc description of Hive table
 * @param records the records to write
 * @throws TException
 * @throws IOException
 * @throws InterruptedException
 */
public static void writeTable(HiveOutputDescription outputDesc, Iterable<HiveWritableRecord> records)
        throws TException, IOException, InterruptedException {
    long uniqueId = System.nanoTime();
    String taskAttemptIdStr = "attempt_200707121733_" + (int) uniqueId + "_m_000005_0";

    String profile = Long.toString(uniqueId);

    HiveConf conf = new HiveConf(HiveOutput.class);
    conf.setInt("mapred.task.partition", 1);
    conf.set("mapred.task.id", taskAttemptIdStr);

    HiveApiOutputFormat.initProfile(conf, outputDesc, profile);

    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(profile);

    TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskAttemptIdStr);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(new JobConf(conf), taskAttemptID);
    JobContext jobContext = new HackJobContext(new JobConf(conf), taskAttemptID.getJobID());

    RecordWriterImpl recordWriter = outputFormat.getRecordWriter(taskContext);

    HiveApiOutputCommitter committer = outputFormat.getOutputCommitter(taskContext);
    committer.setupJob(jobContext);

    committer.setupTask(taskContext);
    for (HiveWritableRecord record : records) {
        recordWriter.write(NullWritable.get(), record);
    }
    recordWriter.close(taskContext);
    committer.commitTask(taskContext);

    committer.commitJob(jobContext);
}

From source file:com.facebook.hiveio.output.PerThread.java

License:Apache License

/**
 * Get task context/*from  w ww .  j  ava 2  s  . c  o m*/
 *
 * @return task context
 */
public TaskAttemptContext taskContext() {
    return new HackTaskAttemptContext(new JobConf(conf), taskID);
}

From source file:com.facebook.hiveio.output.PerThread.java

License:Apache License

/**
 * Get JobConf
 *
 * @return JobConf
 */
public JobConf jobConf() {
    return new JobConf(conf);
}

From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java

License:Apache License

private void loadPartition(HivePartitionMetadata partition) throws IOException {
    String partitionName = partition.getHivePartition().getPartitionId();
    Properties schema = getPartitionSchema(table, partition.getPartition());
    List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition());
    TupleDomain<HiveColumnHandle> effectivePredicate = partition.getHivePartition().getEffectivePredicate();

    Path path = new Path(getPartitionLocation(table, partition.getPartition()));
    Configuration configuration = hdfsEnvironment.getConfiguration(path);
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
    FileSystem fs = hdfsEnvironment.getFileSystem(session.getUser(), path);

    if (inputFormat instanceof SymlinkTextInputFormat) {
        if (bucketHandle.isPresent()) {
            throw new PrestoException(StandardErrorCode.NOT_SUPPORTED,
                    "Bucketed table in SymlinkTextInputFormat is not yet supported");
        }//from  w  w  w  .j  av  a 2 s.co  m

        // TODO: This should use an iterator like the HiveFileIterator
        for (Path targetPath : getTargetPathsFromSymlink(fs, path)) {
            // The input should be in TextInputFormat.
            TextInputFormat targetInputFormat = new TextInputFormat();
            // get the configuration for the target path -- it may be a different hdfs instance
            Configuration targetConfiguration = hdfsEnvironment.getConfiguration(targetPath);
            JobConf targetJob = new JobConf(targetConfiguration);
            targetJob.setInputFormat(TextInputFormat.class);
            targetInputFormat.configure(targetJob);
            FileInputFormat.setInputPaths(targetJob, targetPath);
            InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0);

            for (InputSplit inputSplit : targetSplits) {
                FileSplit split = (FileSplit) inputSplit;
                FileSystem targetFilesystem = hdfsEnvironment.getFileSystem(session.getUser(), split.getPath());
                FileStatus file = targetFilesystem.getFileStatus(split.getPath());
                hiveSplitSource.addToQueue(createHiveSplits(partitionName, file.getPath().toString(),
                        targetFilesystem.getFileBlockLocations(file, split.getStart(), split.getLength()),
                        split.getStart(), split.getLength(), schema, partitionKeys, false, session,
                        OptionalInt.empty(), effectivePredicate, partition.getColumnCoercions()));
                if (stopped) {
                    return;
                }
            }
        }
        return;
    }

    // If only one bucket could match: load that one file
    HiveFileIterator iterator = new HiveFileIterator(path, fs, directoryLister, namenodeStats, partitionName,
            inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions());
    if (!buckets.isEmpty()) {
        int bucketCount = buckets.get(0).getBucketCount();
        List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount);

        for (HiveBucket bucket : buckets) {
            int bucketNumber = bucket.getBucketNumber();
            LocatedFileStatus file = list.get(bucketNumber);
            boolean splittable = isSplittable(iterator.getInputFormat(),
                    hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());

            hiveSplitSource.addToQueue(createHiveSplits(iterator.getPartitionName(), file.getPath().toString(),
                    file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(),
                    iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketNumber),
                    effectivePredicate, partition.getColumnCoercions()));
        }

        return;
    }

    // If table is bucketed: list the directory, sort, tag with bucket id
    if (bucketHandle.isPresent()) {
        // HiveFileIterator skips hidden files automatically.
        int bucketCount = bucketHandle.get().getBucketCount();
        List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount);

        for (int bucketIndex = 0; bucketIndex < bucketCount; bucketIndex++) {
            LocatedFileStatus file = list.get(bucketIndex);
            boolean splittable = isSplittable(iterator.getInputFormat(),
                    hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());

            hiveSplitSource.addToQueue(createHiveSplits(iterator.getPartitionName(), file.getPath().toString(),
                    file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(),
                    iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketIndex),
                    iterator.getEffectivePredicate(), partition.getColumnCoercions()));
        }

        return;
    }

    fileIterators.addLast(iterator);
}