Example usage for org.apache.hadoop.mapreduce JobID JobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobID JobID.

Prototype

public JobID(String jtIdentifier, int id)

Source Link

Document

Constructs a JobID object

Usage

From source file:org.apache.tez.mapreduce.hadoop.TezTypeConverters.java

License:Apache License

public static org.apache.hadoop.mapreduce.JobID toJobID(TezDAGID id) {
    return new JobID(String.valueOf(id.getApplicationId().getClusterTimestamp()), id.getId());

}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopElementIterator.java

License:Apache License

public HadoopElementIterator(final HadoopGraph graph) {
    try {// w ww  .  j a v  a  2  s.  com
        this.graph = graph;
        final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration());
        final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil
                .getReaderAsInputFormat(configuration);
        if (inputFormat instanceof FileInputFormat) {
            final Storage storage = FileSystemStorage.open(configuration);
            if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
                return; // there is no input location and thus, no data (empty graph)
            if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage)
                    .isPresent())
                return; // there is no data at the input location (empty graph)
            configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants
                    .getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get());
        }
        final List<InputSplit> splits = inputFormat
                .getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1)));
        for (final InputSplit split : splits) {
            this.readers.add(inputFormat.createRecordReader(split,
                    new TaskAttemptContextImpl(configuration, new TaskAttemptID())));
        }
    } catch (final Exception e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
}

From source file:org.goldenorb.io.input.VertexInput.java

License:Apache License

/**
* 
*/// w w w .j  a v a2  s .  co  m
@SuppressWarnings("unchecked")
public void initialize() {
    // rebuild the input split
    org.apache.hadoop.mapreduce.InputSplit split = null;
    DataInputBuffer splitBuffer = new DataInputBuffer();
    splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
    SerializationFactory factory = new SerializationFactory(orbConf);
    Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer;
    try {
        deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory
                .getDeserializer(orbConf.getClassByName(splitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);
        JobConf job = new JobConf(orbConf);
        JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0));
        InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat;
        inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils
                .newInstance(jobContext.getInputFormatClass(), orbConf);
        TaskAttemptContext tao = new TaskAttemptContext(job,
                new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0));
        recordReader = inputFormat.createRecordReader(split, tao);
        recordReader.initialize(split, tao);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

}

From source file:org.goldenorb.io.InputSplitAllocator.java

License:Apache License

/**
 * This method gets the raw splits and calls another method to assign them.
 * //from   ww w.java  2 s .c  om
 * @returns Map
 */
@SuppressWarnings({ "deprecation", "rawtypes", "unchecked" })
public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() {
    List<RawSplit> rawSplits = null;
    JobConf job = new JobConf(orbConf);
    LOG.debug(orbConf.getJobNumber().toString());
    JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0));
    org.apache.hadoop.mapreduce.InputFormat<?, ?> input;
    try {
        input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);

        List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext);
        rawSplits = new ArrayList<RawSplit>(splits.size());
        DataOutputBuffer buffer = new DataOutputBuffer();
        SerializationFactory factory = new SerializationFactory(orbConf);
        Serializer serializer = factory.getSerializer(splits.get(0).getClass());
        serializer.open(buffer);
        for (int i = 0; i < splits.size(); i++) {
            buffer.reset();
            serializer.serialize(splits.get(i));
            RawSplit rawSplit = new RawSplit();
            rawSplit.setClassName(splits.get(i).getClass().getName());
            rawSplit.setDataLength(splits.get(i).getLength());
            rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
            rawSplit.setLocations(splits.get(i).getLocations());
            rawSplits.add(rawSplit);
        }
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }
    return assignInputSplits(rawSplits);
}

From source file:org.kitesdk.data.mapreduce.TestMergeOutputCommitter.java

License:Apache License

@Test
public void testSetupJobIsIdempotent() {
    DatasetKeyOutputFormat.MergeOutputCommitter<Object> outputCommitter = new DatasetKeyOutputFormat.MergeOutputCommitter<Object>();

    Configuration conf = DefaultConfiguration.get();
    DatasetKeyOutputFormat.configure(conf).appendTo(outputDataset);

    JobID jobId = new JobID("jt", 42);
    JobContext context = Hadoop.JobContext.ctor.newInstance(conf, jobId);

    // setup the job
    outputCommitter.setupJob(context);/*from ww w .j a v  a  2s  .  c  o m*/

    // call setup again to simulate an ApplicationMaster restart
    outputCommitter.setupJob(context);
}

From source file:parquet.hadoop.thrift.TestParquetToThriftReadProjection.java

License:Apache License

private <T extends TBase<?, ?>> void shouldDoProjection(Configuration conf, T recordToWrite,
        T exptectedReadResult, Class<? extends TBase<?, ?>> thriftClass) throws Exception {
    final Path parquetFile = new Path("target/test/TestParquetToThriftReadProjection/file.parquet");
    final FileSystem fs = parquetFile.getFileSystem(conf);
    if (fs.exists(parquetFile)) {
        fs.delete(parquetFile, true);//from   ww  w  .j  ava 2 s.  c o  m
    }

    //create a test file
    final TProtocolFactory protocolFactory = new TCompactProtocol.Factory();
    final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0);
    final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile,
            ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, thriftClass);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos));

    recordToWrite.write(protocol);
    w.write(new BytesWritable(baos.toByteArray()));
    w.close();

    final ParquetThriftInputFormat<T> parquetThriftInputFormat = new ParquetThriftInputFormat<T>();
    final Job job = new Job(conf, "read");
    job.setInputFormatClass(ParquetThriftInputFormat.class);
    ParquetThriftInputFormat.setInputPaths(job, parquetFile);
    final JobID jobID = new JobID("local", 1);
    List<InputSplit> splits = parquetThriftInputFormat
            .getSplits(ContextUtil.newJobContext(ContextUtil.getConfiguration(job), jobID));
    T readValue = null;
    for (InputSplit split : splits) {
        TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(
                ContextUtil.getConfiguration(job), new TaskAttemptID(new TaskID(jobID, true, 1), 0));
        final RecordReader<Void, T> reader = parquetThriftInputFormat.createRecordReader(split,
                taskAttemptContext);
        reader.initialize(split, taskAttemptContext);
        if (reader.nextKeyValue()) {
            readValue = reader.getCurrentValue();
            LOG.info(readValue);
        }
    }
    assertEquals(exptectedReadResult, readValue);

}

From source file:parquet.scrooge.ParquetScroogeSchemeTest.java

License:Apache License

public <T> void verifyScroogeRead(TBase recordToWrite, Class<T> readClass, String expectedStr,
        String projectionFilter) throws Exception {
    Configuration conf = new Configuration();
    conf.set("parquet.thrift.converter.class", ScroogeRecordConverter.class.getName());
    conf.set(ThriftReadSupport.THRIFT_READ_CLASS_KEY, readClass.getName());
    conf.set(ThriftReadSupport.THRIFT_COLUMN_FILTER_KEY, projectionFilter);

    final Path parquetFile = new Path("target/test/TestParquetToThriftReadProjection/file.parquet");
    final FileSystem fs = parquetFile.getFileSystem(conf);
    if (fs.exists(parquetFile)) {
        fs.delete(parquetFile, true);/*  www . j a  v  a  2  s  . c o  m*/
    }

    //create a test file
    final TProtocolFactory protocolFactory = new TCompactProtocol.Factory();
    final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0);
    Class writeClass = recordToWrite.getClass();
    final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile,
            ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, writeClass);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos));

    recordToWrite.write(protocol);
    w.write(new BytesWritable(baos.toByteArray()));
    w.close();

    final ParquetScroogeInputFormat<T> parquetScroogeInputFormat = new ParquetScroogeInputFormat<T>();
    final Job job = new Job(conf, "read");
    job.setInputFormatClass(ParquetThriftInputFormat.class);
    ParquetThriftInputFormat.setInputPaths(job, parquetFile);
    final JobID jobID = new JobID("local", 1);
    List<InputSplit> splits = parquetScroogeInputFormat
            .getSplits(new JobContext(ContextUtil.getConfiguration(job), jobID));
    T readValue = null;
    for (InputSplit split : splits) {
        TaskAttemptContext taskAttemptContext = new TaskAttemptContext(ContextUtil.getConfiguration(job),
                new TaskAttemptID(new TaskID(jobID, true, 1), 0));
        final RecordReader<Void, T> reader = parquetScroogeInputFormat.createRecordReader(split,
                taskAttemptContext);
        reader.initialize(split, taskAttemptContext);
        if (reader.nextKeyValue()) {
            readValue = reader.getCurrentValue();
        }
    }
    assertEquals(expectedStr, readValue.toString());
}