List of usage examples for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID
@Deprecated public TaskAttemptID(String jtIdentifier, int jobId, boolean isMap, int taskId, int id)
From source file:org.apache.parquet.hadoop.thrift.TestThriftToParquetFileWriter.java
License:Apache License
private <T extends TBase<?, ?>> Path createFile(T... tObjs) throws IOException, InterruptedException, TException { final Path fileToCreate = new Path( "target/test/TestThriftToParquetFileWriter/" + tObjs[0].getClass() + ".parquet"); LOG.info("File created: " + fileToCreate.toString()); Configuration conf = new Configuration(); final FileSystem fs = fileToCreate.getFileSystem(conf); if (fs.exists(fileToCreate)) { fs.delete(fileToCreate, true);//from ww w. j a v a 2 s.c om } TProtocolFactory protocolFactory = new TCompactProtocol.Factory(); TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0); ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(fileToCreate, ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, (Class<? extends TBase<?, ?>>) tObjs[0].getClass()); for (T tObj : tObjs) { final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos)); tObj.write(protocol); w.write(new BytesWritable(baos.toByteArray())); } w.close(); return fileToCreate; }
From source file:org.apache.parquet.pig.PerfTest2.java
License:Apache License
public static void write(String out) throws IOException, ParserException, InterruptedException, ExecException { {/*from w w w . j a va2 s . co m*/ StringBuilder schemaString = new StringBuilder("a0: chararray"); for (int i = 1; i < COLUMN_COUNT; i++) { schemaString.append(", a" + i + ": chararray"); } String location = out; String schema = schemaString.toString(); StoreFuncInterface storer = new ParquetStorer(); Job job = new Job(conf); storer.setStoreFuncUDFContextSignature("sig"); String absPath = storer.relToAbsPathForStoreLocation(location, new Path(new File(".").getAbsoluteFile().toURI())); storer.setStoreLocation(absPath, job); storer.checkSchema(new ResourceSchema(Utils.getSchemaFromString(schema))); @SuppressWarnings("unchecked") // that's how the base class is defined OutputFormat<Void, Tuple> outputFormat = storer.getOutputFormat(); // it's ContextUtil.getConfiguration(job) and not just conf ! JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job), new JobID("jt", jobid++)); outputFormat.checkOutputSpecs(jobContext); if (schema != null) { ResourceSchema resourceSchema = new ResourceSchema(Utils.getSchemaFromString(schema)); storer.checkSchema(resourceSchema); if (storer instanceof StoreMetadata) { ((StoreMetadata) storer).storeSchema(resourceSchema, absPath, job); } } TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext( ContextUtil.getConfiguration(job), new TaskAttemptID("jt", jobid, true, 1, 0)); RecordWriter<Void, Tuple> recordWriter = outputFormat.getRecordWriter(taskAttemptContext); storer.prepareToWrite(recordWriter); for (int i = 0; i < ROW_COUNT; i++) { Tuple tuple = TupleFactory.getInstance().newTuple(COLUMN_COUNT); for (int j = 0; j < COLUMN_COUNT; j++) { tuple.set(j, "a" + i + "_" + j); } storer.putNext(tuple); } recordWriter.close(taskAttemptContext); OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskAttemptContext); outputCommitter.commitTask(taskAttemptContext); outputCommitter.commitJob(jobContext); } }
From source file:org.apache.parquet.pig.PerfTest2.java
License:Apache License
static void load(String out, int colsToLoad, StringBuilder results) throws Exception { StringBuilder schemaString = new StringBuilder("a0: chararray"); for (int i = 1; i < colsToLoad; i++) { schemaString.append(", a" + i + ": chararray"); }/* w w w . ja va2 s .co m*/ long t0 = System.currentTimeMillis(); Job job = new Job(conf); int loadjobId = jobid++; LoadFunc loadFunc = new ParquetLoader(schemaString.toString()); loadFunc.setUDFContextSignature("sigLoader" + loadjobId); String absPath = loadFunc.relativeToAbsolutePath(out, new Path(new File(".").getAbsoluteFile().toURI())); loadFunc.setLocation(absPath, job); @SuppressWarnings("unchecked") // that's how the base class is defined InputFormat<Void, Tuple> inputFormat = loadFunc.getInputFormat(); JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job), new JobID("jt", loadjobId)); List<InputSplit> splits = inputFormat.getSplits(jobContext); int i = 0; int taskid = 0; for (InputSplit split : splits) { TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext( ContextUtil.getConfiguration(job), new TaskAttemptID("jt", loadjobId, true, taskid++, 0)); RecordReader<Void, Tuple> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); loadFunc.prepareToRead(recordReader, null); recordReader.initialize(split, taskAttemptContext); Tuple t; while ((t = loadFunc.getNext()) != null) { if (Log.DEBUG) System.out.println(t); ++i; } } assertEquals(ROW_COUNT, i); long t1 = System.currentTimeMillis(); results.append((t1 - t0) + " ms to read " + colsToLoad + " columns\n"); }
From source file:org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims.java
License:Apache License
static public TaskAttemptID getNewTaskAttemptID() { TaskAttemptID taskAttemptID = new TaskAttemptID("", 1, TaskType.MAP, 1, 1); return taskAttemptID; }
From source file:org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims.java
License:Apache License
static public TaskAttemptID createTaskAttemptID(String jtIdentifier, int jobId, boolean isMap, int taskId, int id) { if (isMap) {//from w w w .j a v a 2 s . c om return new TaskAttemptID(jtIdentifier, jobId, TaskType.MAP, taskId, id); } else { return new TaskAttemptID(jtIdentifier, jobId, TaskType.REDUCE, taskId, id); } }
From source file:org.apache.pig.piggybank.squeal.backend.storm.io.StormPOStoreImpl.java
License:Apache License
public StormPOStoreImpl(String stormId, int partitionIndex, AtomicInteger sign) { this.partitionIndex = partitionIndex; this.sign = sign; // "storm.id" "PigStorm-3-0-1-1363457130" PigStorm-3-0-1-1363536122 // TaskAttemptID(String jtIdentifier, int jobId, boolean isMap, int taskId, int id) TaskAttemptID attemptID = new TaskAttemptID(stormId, (int) (System.currentTimeMillis() / 1000), true, partitionIndex, sign.get()); // Create a fake TaskContext for this stuff. Configuration outputConf = new Configuration(); this.context = HadoopShims.createTaskAttemptContext(outputConf, attemptID); }
From source file:org.apache.tajo.storage.hbase.HFileAppender.java
License:Apache License
@Override public void init() throws IOException { super.init(); Configuration taskConf = new Configuration(); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); taskConf.set(FileOutputFormat.OUTDIR, stagingResultDir.toString()); ExecutionBlockId ebId = taskAttemptId.getTaskId().getExecutionBlockId(); writerContext = new TaskAttemptContextImpl(taskConf, new TaskAttemptID(ebId.getQueryId().toString(), ebId.getId(), TaskType.MAP, taskAttemptId.getTaskId().getId(), taskAttemptId.getId())); HFileOutputFormat2 hFileOutputFormat2 = new HFileOutputFormat2(); try {/*from w w w.j ava 2 s.c o m*/ writer = hFileOutputFormat2.getRecordWriter(writerContext); committer = new FileOutputCommitter(FileOutputFormat.getOutputPath(writerContext), writerContext); workingFilePath = committer.getWorkPath(); } catch (InterruptedException e) { throw new IOException(e.getMessage(), e); } LOG.info("Created hbase file writer: " + workingFilePath); }
From source file:org.apache.tez.mapreduce.committer.MROutputCommitter.java
License:Apache License
@SuppressWarnings("rawtypes") private org.apache.hadoop.mapreduce.OutputCommitter getOutputCommitter(OutputCommitterContext context) { org.apache.hadoop.mapreduce.OutputCommitter committer = null; newApiCommitter = false;/*w w w . j a v a2 s. c o m*/ if (jobConf.getBoolean("mapred.reducer.new-api", false) || jobConf.getBoolean("mapred.mapper.new-api", false)) { newApiCommitter = true; LOG.info("Using mapred newApiCommitter."); } if (newApiCommitter) { TaskAttemptID taskAttemptID = new TaskAttemptID( Long.toString(context.getApplicationId().getClusterTimestamp()), context.getApplicationId().getId(), ((jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false) ? TaskType.MAP : TaskType.REDUCE)), 0, context.getDAGAttemptNumber()); TaskAttemptContext taskContext = new TaskAttemptContextImpl(jobConf, taskAttemptID); try { OutputFormat outputFormat = ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), jobConf); committer = outputFormat.getOutputCommitter(taskContext); } catch (Exception e) { throw new TezUncheckedException(e); } } else { committer = ReflectionUtils.newInstance(jobConf.getClass("mapred.output.committer.class", FileOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class), jobConf); } LOG.info("OutputCommitter for outputName=" + context.getOutputName() + ", vertexName=" + context.getVertexName() + ", outputCommitterClass=" + committer.getClass().getName()); return committer; }
From source file:org.apache.tez.mapreduce.committer.MROutputCommitter.java
License:Apache License
@Override public void recoverTask(int taskIndex, int attemptId) throws IOException { if (!initialized) { throw new RuntimeException("Committer not initialized"); }//from w w w . j ava2 s .co m TaskAttemptID taskAttemptID = new TaskAttemptID( Long.toString(getContext().getApplicationId().getClusterTimestamp()) + String.valueOf(getContext().getVertexIndex()), getContext().getApplicationId().getId(), ((jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false) ? TaskType.MAP : TaskType.REDUCE)), taskIndex, attemptId); TaskAttemptContext taskContext = new TaskAttemptContextImpl(jobConf, taskAttemptID); committer.recoverTask(taskContext); }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.RecordReaderWriterTest.java
License:Apache License
private static void validateFileSplits(final List<FileSplit> fileSplits, final Configuration configuration, final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass, final Optional<Class<? extends OutputFormat<NullWritable, VertexWritable>>> outFormatClass) throws Exception { final InputFormat inputFormat = ReflectionUtils.newInstance(inputFormatClass, configuration); final TaskAttemptContext job = new TaskAttemptContextImpl(configuration, new TaskAttemptID(UUID.randomUUID().toString(), 0, TaskType.MAP, 0, 0)); int vertexCount = 0; int outEdgeCount = 0; int inEdgeCount = 0; final OutputFormat<NullWritable, VertexWritable> outputFormat = outFormatClass.isPresent() ? ReflectionUtils.newInstance(outFormatClass.get(), configuration) : null;/* ww w . j a v a 2 s . c om*/ final RecordWriter<NullWritable, VertexWritable> writer = null == outputFormat ? null : outputFormat.getRecordWriter(job); boolean foundKeyValue = false; for (final FileSplit split : fileSplits) { logger.info("\treading file split {}", split.getPath().getName() + " ({}", split.getStart() + "..." + (split.getStart() + split.getLength()), "{} {} bytes)"); final RecordReader reader = inputFormat.createRecordReader(split, job); float lastProgress = -1f; while (reader.nextKeyValue()) { //System.out.println("" + reader.getProgress() + "> " + reader.getCurrentKey() + ": " + reader.getCurrentValue()); final float progress = reader.getProgress(); assertTrue(progress >= lastProgress); assertEquals(NullWritable.class, reader.getCurrentKey().getClass()); final VertexWritable vertexWritable = (VertexWritable) reader.getCurrentValue(); if (null != writer) writer.write(NullWritable.get(), vertexWritable); vertexCount++; outEdgeCount = outEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.OUT)); inEdgeCount = inEdgeCount + (int) IteratorUtils.count(vertexWritable.get().edges(Direction.IN)); // final Vertex vertex = vertexWritable.get(); assertEquals(Integer.class, vertex.id().getClass()); if (vertex.value("name").equals("SUGAR MAGNOLIA")) { foundKeyValue = true; assertEquals(92, IteratorUtils.count(vertex.edges(Direction.OUT))); assertEquals(77, IteratorUtils.count(vertex.edges(Direction.IN))); } lastProgress = progress; } } assertEquals(8049, outEdgeCount); assertEquals(8049, inEdgeCount); assertEquals(outEdgeCount, inEdgeCount); assertEquals(808, vertexCount); assertTrue(foundKeyValue); if (null != writer) { writer.close(new TaskAttemptContextImpl(configuration, job.getTaskAttemptID())); for (int i = 1; i < 10; i++) { final File outputDirectory = new File( new URL(configuration.get("mapreduce.output.fileoutputformat.outputdir")).toURI()); final List<FileSplit> splits = generateFileSplits( new File(outputDirectory.getAbsoluteFile() + "/_temporary/0/_temporary/" + job.getTaskAttemptID().getTaskID().toString().replace("task", "attempt") + "_0" + "/part-m-00000"), i); validateFileSplits(splits, configuration, inputFormatClass, Optional.empty()); } } }