List of usage examples for org.apache.hadoop.mapreduce JobID JobID
public JobID(String jtIdentifier, int id)
From source file:org.apache.parquet.hadoop.codec.CodecConfigTest.java
License:Apache License
public void shouldUseParquetFlagToSetCodec(String codecNameStr, CompressionCodecName expectedCodec) throws IOException { //Test mapreduce API Job job = new Job(); Configuration conf = job.getConfiguration(); conf.set(ParquetOutputFormat.COMPRESSION, codecNameStr); TaskAttemptContext task = ContextUtil.newTaskAttemptContext(conf, new TaskAttemptID(new TaskID(new JobID("test", 1), false, 1), 1)); Assert.assertEquals(CodecConfig.from(task).getCodec(), expectedCodec); //Test mapred API JobConf jobConf = new JobConf(); jobConf.set(ParquetOutputFormat.COMPRESSION, codecNameStr); Assert.assertEquals(CodecConfig.from(jobConf).getCodec(), expectedCodec); }
From source file:org.apache.parquet.hadoop.codec.CodecConfigTest.java
License:Apache License
public void shouldUseHadoopFlagToSetCodec(String codecClassStr, CompressionCodecName expectedCodec) throws IOException { //Test mapreduce API Job job = new Job(); Configuration conf = job.getConfiguration(); conf.setBoolean("mapred.output.compress", true); conf.set("mapred.output.compression.codec", codecClassStr); TaskAttemptContext task = ContextUtil.newTaskAttemptContext(conf, new TaskAttemptID(new TaskID(new JobID("test", 1), false, 1), 1)); Assert.assertEquals(expectedCodec, CodecConfig.from(task).getCodec()); //Test mapred API JobConf jobConf = new JobConf(); jobConf.setBoolean("mapred.output.compress", true); jobConf.set("mapred.output.compression.codec", codecClassStr); Assert.assertEquals(CodecConfig.from(jobConf).getCodec(), expectedCodec); }
From source file:org.apache.parquet.hadoop.thrift.TestParquetToThriftReadWriteAndProjection.java
License:Apache License
private <T extends TBase<?, ?>> void shouldDoProjection(Configuration conf, T recordToWrite, T exptectedReadResult, Class<? extends TBase<?, ?>> thriftClass) throws Exception { final Path parquetFile = new Path("target/test/TestParquetToThriftReadWriteAndProjection/file.parquet"); final FileSystem fs = parquetFile.getFileSystem(conf); if (fs.exists(parquetFile)) { fs.delete(parquetFile, true);/*from ww w . j a va 2 s. com*/ } //create a test file final TProtocolFactory protocolFactory = new TCompactProtocol.Factory(); final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0); final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile, ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, thriftClass); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos)); recordToWrite.write(protocol); w.write(new BytesWritable(baos.toByteArray())); w.close(); final ParquetThriftInputFormat<T> parquetThriftInputFormat = new ParquetThriftInputFormat<T>(); final Job job = new Job(conf, "read"); job.setInputFormatClass(ParquetThriftInputFormat.class); ParquetThriftInputFormat.setInputPaths(job, parquetFile); final JobID jobID = new JobID("local", 1); List<InputSplit> splits = parquetThriftInputFormat .getSplits(ContextUtil.newJobContext(ContextUtil.getConfiguration(job), jobID)); T readValue = null; for (InputSplit split : splits) { TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext( ContextUtil.getConfiguration(job), new TaskAttemptID(new TaskID(jobID, true, 1), 0)); final RecordReader<Void, T> reader = parquetThriftInputFormat.createRecordReader(split, taskAttemptContext); reader.initialize(split, taskAttemptContext); if (reader.nextKeyValue()) { readValue = reader.getCurrentValue(); LOG.info(readValue); } } assertEquals(exptectedReadResult, readValue); }
From source file:org.apache.parquet.pig.PerfTest2.java
License:Apache License
public static void write(String out) throws IOException, ParserException, InterruptedException, ExecException { {/*from w ww . j ava 2 s . c o m*/ StringBuilder schemaString = new StringBuilder("a0: chararray"); for (int i = 1; i < COLUMN_COUNT; i++) { schemaString.append(", a" + i + ": chararray"); } String location = out; String schema = schemaString.toString(); StoreFuncInterface storer = new ParquetStorer(); Job job = new Job(conf); storer.setStoreFuncUDFContextSignature("sig"); String absPath = storer.relToAbsPathForStoreLocation(location, new Path(new File(".").getAbsoluteFile().toURI())); storer.setStoreLocation(absPath, job); storer.checkSchema(new ResourceSchema(Utils.getSchemaFromString(schema))); @SuppressWarnings("unchecked") // that's how the base class is defined OutputFormat<Void, Tuple> outputFormat = storer.getOutputFormat(); // it's ContextUtil.getConfiguration(job) and not just conf ! JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job), new JobID("jt", jobid++)); outputFormat.checkOutputSpecs(jobContext); if (schema != null) { ResourceSchema resourceSchema = new ResourceSchema(Utils.getSchemaFromString(schema)); storer.checkSchema(resourceSchema); if (storer instanceof StoreMetadata) { ((StoreMetadata) storer).storeSchema(resourceSchema, absPath, job); } } TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext( ContextUtil.getConfiguration(job), new TaskAttemptID("jt", jobid, true, 1, 0)); RecordWriter<Void, Tuple> recordWriter = outputFormat.getRecordWriter(taskAttemptContext); storer.prepareToWrite(recordWriter); for (int i = 0; i < ROW_COUNT; i++) { Tuple tuple = TupleFactory.getInstance().newTuple(COLUMN_COUNT); for (int j = 0; j < COLUMN_COUNT; j++) { tuple.set(j, "a" + i + "_" + j); } storer.putNext(tuple); } recordWriter.close(taskAttemptContext); OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskAttemptContext); outputCommitter.commitTask(taskAttemptContext); outputCommitter.commitJob(jobContext); } }
From source file:org.apache.parquet.pig.PerfTest2.java
License:Apache License
static void load(String out, int colsToLoad, StringBuilder results) throws Exception { StringBuilder schemaString = new StringBuilder("a0: chararray"); for (int i = 1; i < colsToLoad; i++) { schemaString.append(", a" + i + ": chararray"); }/*from w w w . j a v a 2 s.c om*/ long t0 = System.currentTimeMillis(); Job job = new Job(conf); int loadjobId = jobid++; LoadFunc loadFunc = new ParquetLoader(schemaString.toString()); loadFunc.setUDFContextSignature("sigLoader" + loadjobId); String absPath = loadFunc.relativeToAbsolutePath(out, new Path(new File(".").getAbsoluteFile().toURI())); loadFunc.setLocation(absPath, job); @SuppressWarnings("unchecked") // that's how the base class is defined InputFormat<Void, Tuple> inputFormat = loadFunc.getInputFormat(); JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job), new JobID("jt", loadjobId)); List<InputSplit> splits = inputFormat.getSplits(jobContext); int i = 0; int taskid = 0; for (InputSplit split : splits) { TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext( ContextUtil.getConfiguration(job), new TaskAttemptID("jt", loadjobId, true, taskid++, 0)); RecordReader<Void, Tuple> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); loadFunc.prepareToRead(recordReader, null); recordReader.initialize(split, taskAttemptContext); Tuple t; while ((t = loadFunc.getNext()) != null) { if (Log.DEBUG) System.out.println(t); ++i; } } assertEquals(ROW_COUNT, i); long t1 = System.currentTimeMillis(); results.append((t1 - t0) + " ms to read " + colsToLoad + " columns\n"); }
From source file:org.apache.tajo.storage.hbase.HBaseStorageManager.java
License:Apache License
@Override public Path commitOutputData(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, Schema schema, TableDesc tableDesc) throws IOException { if (tableDesc == null) { throw new IOException("TableDesc is null while calling loadIncrementalHFiles: " + finalEbId); }//from w ww . j av a 2 s . c om Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR)); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); Configuration hbaseConf = HBaseStorageManager.getHBaseConfiguration(queryContext.getConf(), tableDesc.getMeta()); hbaseConf.set("hbase.loadincremental.threads.max", "2"); JobContextImpl jobContext = new JobContextImpl(hbaseConf, new JobID(finalEbId.getQueryId().toString(), finalEbId.getId())); FileOutputCommitter committer = new FileOutputCommitter(stagingResultDir, jobContext); Path jobAttemptPath = committer.getJobAttemptPath(jobContext); FileSystem fs = jobAttemptPath.getFileSystem(queryContext.getConf()); if (!fs.exists(jobAttemptPath) || fs.listStatus(jobAttemptPath) == null) { LOG.warn("No query attempt file in " + jobAttemptPath); return stagingResultDir; } committer.commitJob(jobContext); if (tableDesc.getName() == null && tableDesc.getPath() != null) { // insert into location return super.commitOutputData(queryContext, finalEbId, plan, schema, tableDesc, false); } else { // insert into table String tableName = tableDesc.getMeta().getOption(HBaseStorageConstants.META_TABLE_KEY); HTable htable = new HTable(hbaseConf, tableName); try { LoadIncrementalHFiles loadIncrementalHFiles = null; try { loadIncrementalHFiles = new LoadIncrementalHFiles(hbaseConf); } catch (Exception e) { LOG.error(e.getMessage(), e); throw new IOException(e.getMessage(), e); } loadIncrementalHFiles.doBulkLoad(stagingResultDir, htable); return stagingResultDir; } finally { htable.close(); } } }
From source file:org.apache.tajo.storage.hbase.HBaseTablespace.java
License:Apache License
@Override public Path commitTable(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, Schema schema, TableDesc tableDesc) throws IOException { if (tableDesc == null) { throw new IOException("TableDesc is null while calling loadIncrementalHFiles: " + finalEbId); }//from w w w . j a va 2 s. c o m Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR)); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); Configuration hbaseConf = HBaseConfiguration.create(this.hbaseConf); hbaseConf.set("hbase.loadincremental.threads.max", "2"); JobContextImpl jobContext = new JobContextImpl(hbaseConf, new JobID(finalEbId.getQueryId().toString(), finalEbId.getId())); FileOutputCommitter committer = new FileOutputCommitter(stagingResultDir, jobContext); Path jobAttemptPath = committer.getJobAttemptPath(jobContext); FileSystem fs = jobAttemptPath.getFileSystem(queryContext.getConf()); if (!fs.exists(jobAttemptPath) || fs.listStatus(jobAttemptPath) == null) { LOG.warn("No query attempt file in " + jobAttemptPath); return stagingResultDir; } committer.commitJob(jobContext); // insert into table String tableName = tableDesc.getMeta().getOption(HBaseStorageConstants.META_TABLE_KEY); HTable htable = new HTable(hbaseConf, tableName); try { LoadIncrementalHFiles loadIncrementalHFiles = null; try { loadIncrementalHFiles = new LoadIncrementalHFiles(hbaseConf); } catch (Exception e) { LOG.error(e.getMessage(), e); throw new IOException(e.getMessage(), e); } loadIncrementalHFiles.doBulkLoad(stagingResultDir, htable); return stagingResultDir; } finally { htable.close(); } }
From source file:org.apache.tez.auxservices.ShuffleHandler.java
License:Apache License
@Override public void initializeApplication(ApplicationInitializationContext context) { String user = context.getUser(); ApplicationId appId = context.getApplicationId(); ByteBuffer secret = context.getApplicationDataForService(); // TODO these bytes should be versioned try {// www. j a v a 2s .c o m Token<JobTokenIdentifier> jt = deserializeServiceData(secret); // TODO: Once SHuffle is out of NM, this can use MR APIs JobID jobId = new JobID(Long.toString(appId.getClusterTimestamp()), appId.getId()); recordJobShuffleInfo(jobId, user, jt); } catch (IOException e) { LOG.error("Error during initApp", e); // TODO add API to AuxiliaryServices to report failures } }
From source file:org.apache.tez.auxservices.ShuffleHandler.java
License:Apache License
@Override public void stopApplication(ApplicationTerminationContext context) { ApplicationId appId = context.getApplicationId(); JobID jobId = new JobID(Long.toString(appId.getClusterTimestamp()), appId.getId()); try {/*from w w w . ja va 2 s . co m*/ removeJobShuffleInfo(jobId); } catch (IOException e) { LOG.error("Error during stopApp", e); // TODO add API to AuxiliaryServices to report failures } }
From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java
License:Apache License
private void verifyLocationHints(Path inputSplitsDir, List<TaskLocationHint> actual) throws Exception { JobID jobId = new JobID("dummy", 1); JobSplit.TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, remoteFs, conf, inputSplitsDir);/*w w w . j a v a2 s . c om*/ int splitsCount = splitsInfo.length; List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount); for (int i = 0; i < splitsCount; ++i) { locationHints.add(TaskLocationHint.createTaskLocationHint( new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null)); } Assert.assertEquals(locationHints, actual); }