List of usage examples for org.apache.hadoop.mapreduce JobID JobID
public JobID(String jtIdentifier, int id)
From source file:org.apache.tez.mapreduce.hadoop.TezTypeConverters.java
License:Apache License
public static org.apache.hadoop.mapreduce.JobID toJobID(TezDAGID id) { return new JobID(String.valueOf(id.getApplicationId().getClusterTimestamp()), id.getId()); }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopElementIterator.java
License:Apache License
public HadoopElementIterator(final HadoopGraph graph) { try {// w ww . j a v a 2 s. com this.graph = graph; final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration()); final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil .getReaderAsInputFormat(configuration); if (inputFormat instanceof FileInputFormat) { final Storage storage = FileSystemStorage.open(configuration); if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) return; // there is no input location and thus, no data (empty graph) if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage) .isPresent()) return; // there is no data at the input location (empty graph) configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants .getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get()); } final List<InputSplit> splits = inputFormat .getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1))); for (final InputSplit split : splits) { this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID()))); } } catch (final Exception e) { throw new IllegalStateException(e.getMessage(), e); } }
From source file:org.goldenorb.io.input.VertexInput.java
License:Apache License
/** * */// w w w .j a v a2 s . co m @SuppressWarnings("unchecked") public void initialize() { // rebuild the input split org.apache.hadoop.mapreduce.InputSplit split = null; DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(orbConf); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer; try { deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(orbConf.getClassByName(splitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); JobConf job = new JobConf(orbConf); JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0)); InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat; inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils .newInstance(jobContext.getInputFormatClass(), orbConf); TaskAttemptContext tao = new TaskAttemptContext(job, new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0)); recordReader = inputFormat.createRecordReader(split, tao); recordReader.initialize(split, tao); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } }
From source file:org.goldenorb.io.InputSplitAllocator.java
License:Apache License
/** * This method gets the raw splits and calls another method to assign them. * //from ww w.java 2 s .c om * @returns Map */ @SuppressWarnings({ "deprecation", "rawtypes", "unchecked" }) public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() { List<RawSplit> rawSplits = null; JobConf job = new JobConf(orbConf); LOG.debug(orbConf.getJobNumber().toString()); JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0)); org.apache.hadoop.mapreduce.InputFormat<?, ?> input; try { input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf); List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext); rawSplits = new ArrayList<RawSplit>(splits.size()); DataOutputBuffer buffer = new DataOutputBuffer(); SerializationFactory factory = new SerializationFactory(orbConf); Serializer serializer = factory.getSerializer(splits.get(0).getClass()); serializer.open(buffer); for (int i = 0; i < splits.size(); i++) { buffer.reset(); serializer.serialize(splits.get(i)); RawSplit rawSplit = new RawSplit(); rawSplit.setClassName(splits.get(i).getClass().getName()); rawSplit.setDataLength(splits.get(i).getLength()); rawSplit.setBytes(buffer.getData(), 0, buffer.getLength()); rawSplit.setLocations(splits.get(i).getLocations()); rawSplits.add(rawSplit); } } catch (ClassNotFoundException e) { e.printStackTrace(); throw new RuntimeException(e); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } catch (InterruptedException e) { e.printStackTrace(); throw new RuntimeException(e); } return assignInputSplits(rawSplits); }
From source file:org.kitesdk.data.mapreduce.TestMergeOutputCommitter.java
License:Apache License
@Test public void testSetupJobIsIdempotent() { DatasetKeyOutputFormat.MergeOutputCommitter<Object> outputCommitter = new DatasetKeyOutputFormat.MergeOutputCommitter<Object>(); Configuration conf = DefaultConfiguration.get(); DatasetKeyOutputFormat.configure(conf).appendTo(outputDataset); JobID jobId = new JobID("jt", 42); JobContext context = Hadoop.JobContext.ctor.newInstance(conf, jobId); // setup the job outputCommitter.setupJob(context);/*from ww w .j a v a 2s . c o m*/ // call setup again to simulate an ApplicationMaster restart outputCommitter.setupJob(context); }
From source file:parquet.hadoop.thrift.TestParquetToThriftReadProjection.java
License:Apache License
private <T extends TBase<?, ?>> void shouldDoProjection(Configuration conf, T recordToWrite, T exptectedReadResult, Class<? extends TBase<?, ?>> thriftClass) throws Exception { final Path parquetFile = new Path("target/test/TestParquetToThriftReadProjection/file.parquet"); final FileSystem fs = parquetFile.getFileSystem(conf); if (fs.exists(parquetFile)) { fs.delete(parquetFile, true);//from ww w .j ava 2 s. c o m } //create a test file final TProtocolFactory protocolFactory = new TCompactProtocol.Factory(); final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0); final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile, ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, thriftClass); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos)); recordToWrite.write(protocol); w.write(new BytesWritable(baos.toByteArray())); w.close(); final ParquetThriftInputFormat<T> parquetThriftInputFormat = new ParquetThriftInputFormat<T>(); final Job job = new Job(conf, "read"); job.setInputFormatClass(ParquetThriftInputFormat.class); ParquetThriftInputFormat.setInputPaths(job, parquetFile); final JobID jobID = new JobID("local", 1); List<InputSplit> splits = parquetThriftInputFormat .getSplits(ContextUtil.newJobContext(ContextUtil.getConfiguration(job), jobID)); T readValue = null; for (InputSplit split : splits) { TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext( ContextUtil.getConfiguration(job), new TaskAttemptID(new TaskID(jobID, true, 1), 0)); final RecordReader<Void, T> reader = parquetThriftInputFormat.createRecordReader(split, taskAttemptContext); reader.initialize(split, taskAttemptContext); if (reader.nextKeyValue()) { readValue = reader.getCurrentValue(); LOG.info(readValue); } } assertEquals(exptectedReadResult, readValue); }
From source file:parquet.scrooge.ParquetScroogeSchemeTest.java
License:Apache License
public <T> void verifyScroogeRead(TBase recordToWrite, Class<T> readClass, String expectedStr, String projectionFilter) throws Exception { Configuration conf = new Configuration(); conf.set("parquet.thrift.converter.class", ScroogeRecordConverter.class.getName()); conf.set(ThriftReadSupport.THRIFT_READ_CLASS_KEY, readClass.getName()); conf.set(ThriftReadSupport.THRIFT_COLUMN_FILTER_KEY, projectionFilter); final Path parquetFile = new Path("target/test/TestParquetToThriftReadProjection/file.parquet"); final FileSystem fs = parquetFile.getFileSystem(conf); if (fs.exists(parquetFile)) { fs.delete(parquetFile, true);/* www . j a v a 2 s . c o m*/ } //create a test file final TProtocolFactory protocolFactory = new TCompactProtocol.Factory(); final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0); Class writeClass = recordToWrite.getClass(); final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile, ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, writeClass); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos)); recordToWrite.write(protocol); w.write(new BytesWritable(baos.toByteArray())); w.close(); final ParquetScroogeInputFormat<T> parquetScroogeInputFormat = new ParquetScroogeInputFormat<T>(); final Job job = new Job(conf, "read"); job.setInputFormatClass(ParquetThriftInputFormat.class); ParquetThriftInputFormat.setInputPaths(job, parquetFile); final JobID jobID = new JobID("local", 1); List<InputSplit> splits = parquetScroogeInputFormat .getSplits(new JobContext(ContextUtil.getConfiguration(job), jobID)); T readValue = null; for (InputSplit split : splits) { TaskAttemptContext taskAttemptContext = new TaskAttemptContext(ContextUtil.getConfiguration(job), new TaskAttemptID(new TaskID(jobID, true, 1), 0)); final RecordReader<Void, T> reader = parquetScroogeInputFormat.createRecordReader(split, taskAttemptContext); reader.initialize(split, taskAttemptContext); if (reader.nextKeyValue()) { readValue = reader.getCurrentValue(); } } assertEquals(expectedStr, readValue.toString()); }