Example usage for org.apache.hadoop.mapreduce JobID JobID

List of usage examples for org.apache.hadoop.mapreduce JobID JobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobID JobID.

Prototype

public JobID(String jtIdentifier, int id) 

Source Link

Document

Constructs a JobID object

Usage

From source file:org.apache.tez.mapreduce.hadoop.TezTypeConverters.java

License:Apache License

public static org.apache.hadoop.mapreduce.JobID toJobID(TezDAGID id) {
    return new JobID(String.valueOf(id.getApplicationId().getClusterTimestamp()), id.getId());

}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopElementIterator.java

License:Apache License

public HadoopElementIterator(final HadoopGraph graph) {
    try {// w ww  .  j a v  a  2  s.  com
        this.graph = graph;
        final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration());
        final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil
                .getReaderAsInputFormat(configuration);
        if (inputFormat instanceof FileInputFormat) {
            final Storage storage = FileSystemStorage.open(configuration);
            if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
                return; // there is no input location and thus, no data (empty graph)
            if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage)
                    .isPresent())
                return; // there is no data at the input location (empty graph)
            configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants
                    .getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get());
        }
        final List<InputSplit> splits = inputFormat
                .getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1)));
        for (final InputSplit split : splits) {
            this.readers.add(inputFormat.createRecordReader(split,
                    new TaskAttemptContextImpl(configuration, new TaskAttemptID())));
        }
    } catch (final Exception e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
}

From source file:org.goldenorb.io.input.VertexInput.java

License:Apache License

/**
* 
*/// w w w .j  a v a2  s .  co  m
@SuppressWarnings("unchecked")
public void initialize() {
    // rebuild the input split
    org.apache.hadoop.mapreduce.InputSplit split = null;
    DataInputBuffer splitBuffer = new DataInputBuffer();
    splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
    SerializationFactory factory = new SerializationFactory(orbConf);
    Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer;
    try {
        deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory
                .getDeserializer(orbConf.getClassByName(splitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);
        JobConf job = new JobConf(orbConf);
        JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0));
        InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat;
        inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils
                .newInstance(jobContext.getInputFormatClass(), orbConf);
        TaskAttemptContext tao = new TaskAttemptContext(job,
                new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0));
        recordReader = inputFormat.createRecordReader(split, tao);
        recordReader.initialize(split, tao);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

}

From source file:org.goldenorb.io.InputSplitAllocator.java

License:Apache License

/**
 * This method gets the raw splits and calls another method to assign them.
 * //from   ww w.java  2 s .c  om
 * @returns Map
 */
@SuppressWarnings({ "deprecation", "rawtypes", "unchecked" })
public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() {
    List<RawSplit> rawSplits = null;
    JobConf job = new JobConf(orbConf);
    LOG.debug(orbConf.getJobNumber().toString());
    JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0));
    org.apache.hadoop.mapreduce.InputFormat<?, ?> input;
    try {
        input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);

        List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext);
        rawSplits = new ArrayList<RawSplit>(splits.size());
        DataOutputBuffer buffer = new DataOutputBuffer();
        SerializationFactory factory = new SerializationFactory(orbConf);
        Serializer serializer = factory.getSerializer(splits.get(0).getClass());
        serializer.open(buffer);
        for (int i = 0; i < splits.size(); i++) {
            buffer.reset();
            serializer.serialize(splits.get(i));
            RawSplit rawSplit = new RawSplit();
            rawSplit.setClassName(splits.get(i).getClass().getName());
            rawSplit.setDataLength(splits.get(i).getLength());
            rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
            rawSplit.setLocations(splits.get(i).getLocations());
            rawSplits.add(rawSplit);
        }
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }
    return assignInputSplits(rawSplits);
}

From source file:org.kitesdk.data.mapreduce.TestMergeOutputCommitter.java

License:Apache License

@Test
public void testSetupJobIsIdempotent() {
    DatasetKeyOutputFormat.MergeOutputCommitter<Object> outputCommitter = new DatasetKeyOutputFormat.MergeOutputCommitter<Object>();

    Configuration conf = DefaultConfiguration.get();
    DatasetKeyOutputFormat.configure(conf).appendTo(outputDataset);

    JobID jobId = new JobID("jt", 42);
    JobContext context = Hadoop.JobContext.ctor.newInstance(conf, jobId);

    // setup the job
    outputCommitter.setupJob(context);/*from ww w .j a v  a  2s  .  c  o m*/

    // call setup again to simulate an ApplicationMaster restart
    outputCommitter.setupJob(context);
}

From source file:parquet.hadoop.thrift.TestParquetToThriftReadProjection.java

License:Apache License

private <T extends TBase<?, ?>> void shouldDoProjection(Configuration conf, T recordToWrite,
        T exptectedReadResult, Class<? extends TBase<?, ?>> thriftClass) throws Exception {
    final Path parquetFile = new Path("target/test/TestParquetToThriftReadProjection/file.parquet");
    final FileSystem fs = parquetFile.getFileSystem(conf);
    if (fs.exists(parquetFile)) {
        fs.delete(parquetFile, true);//from   ww  w  .j  ava 2 s.  c o  m
    }

    //create a test file
    final TProtocolFactory protocolFactory = new TCompactProtocol.Factory();
    final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0);
    final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile,
            ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, thriftClass);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos));

    recordToWrite.write(protocol);
    w.write(new BytesWritable(baos.toByteArray()));
    w.close();

    final ParquetThriftInputFormat<T> parquetThriftInputFormat = new ParquetThriftInputFormat<T>();
    final Job job = new Job(conf, "read");
    job.setInputFormatClass(ParquetThriftInputFormat.class);
    ParquetThriftInputFormat.setInputPaths(job, parquetFile);
    final JobID jobID = new JobID("local", 1);
    List<InputSplit> splits = parquetThriftInputFormat
            .getSplits(ContextUtil.newJobContext(ContextUtil.getConfiguration(job), jobID));
    T readValue = null;
    for (InputSplit split : splits) {
        TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(
                ContextUtil.getConfiguration(job), new TaskAttemptID(new TaskID(jobID, true, 1), 0));
        final RecordReader<Void, T> reader = parquetThriftInputFormat.createRecordReader(split,
                taskAttemptContext);
        reader.initialize(split, taskAttemptContext);
        if (reader.nextKeyValue()) {
            readValue = reader.getCurrentValue();
            LOG.info(readValue);
        }
    }
    assertEquals(exptectedReadResult, readValue);

}

From source file:parquet.scrooge.ParquetScroogeSchemeTest.java

License:Apache License

public <T> void verifyScroogeRead(TBase recordToWrite, Class<T> readClass, String expectedStr,
        String projectionFilter) throws Exception {
    Configuration conf = new Configuration();
    conf.set("parquet.thrift.converter.class", ScroogeRecordConverter.class.getName());
    conf.set(ThriftReadSupport.THRIFT_READ_CLASS_KEY, readClass.getName());
    conf.set(ThriftReadSupport.THRIFT_COLUMN_FILTER_KEY, projectionFilter);

    final Path parquetFile = new Path("target/test/TestParquetToThriftReadProjection/file.parquet");
    final FileSystem fs = parquetFile.getFileSystem(conf);
    if (fs.exists(parquetFile)) {
        fs.delete(parquetFile, true);/*  www . j a  v  a  2  s  . c o  m*/
    }

    //create a test file
    final TProtocolFactory protocolFactory = new TCompactProtocol.Factory();
    final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0);
    Class writeClass = recordToWrite.getClass();
    final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile,
            ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, writeClass);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos));

    recordToWrite.write(protocol);
    w.write(new BytesWritable(baos.toByteArray()));
    w.close();

    final ParquetScroogeInputFormat<T> parquetScroogeInputFormat = new ParquetScroogeInputFormat<T>();
    final Job job = new Job(conf, "read");
    job.setInputFormatClass(ParquetThriftInputFormat.class);
    ParquetThriftInputFormat.setInputPaths(job, parquetFile);
    final JobID jobID = new JobID("local", 1);
    List<InputSplit> splits = parquetScroogeInputFormat
            .getSplits(new JobContext(ContextUtil.getConfiguration(job), jobID));
    T readValue = null;
    for (InputSplit split : splits) {
        TaskAttemptContext taskAttemptContext = new TaskAttemptContext(ContextUtil.getConfiguration(job),
                new TaskAttemptID(new TaskID(jobID, true, 1), 0));
        final RecordReader<Void, T> reader = parquetScroogeInputFormat.createRecordReader(split,
                taskAttemptContext);
        reader.initialize(split, taskAttemptContext);
        if (reader.nextKeyValue()) {
            readValue = reader.getCurrentValue();
        }
    }
    assertEquals(expectedStr, readValue.toString());
}