List of usage examples for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID
@Deprecated public TaskAttemptID(String jtIdentifier, int jobId, boolean isMap, int taskId, int id)
From source file:org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormatTests.java
License:Apache License
/** * Tests output//w w w .ja va 2 s . co m * * @param f * File to output to * @param num * Number of tuples to output * @throws IOException * @throws InterruptedException */ protected final void testOutput(File f, int num) throws IOException, InterruptedException { // Prepare configuration Configuration config = this.prepareConfiguration(); // Set up fake job OutputFormat<NullWritable, T> outputFormat = this.getOutputFormat(); Job job = Job.getInstance(config); job.setOutputFormatClass(outputFormat.getClass()); this.addOutputPath(f, job.getConfiguration(), job); JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); Assert.assertNotNull(FileOutputFormat.getOutputPath(context)); // Output the data TaskAttemptID id = new TaskAttemptID("outputTest", 1, TaskType.MAP, 1, 1); TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), id); RecordWriter<NullWritable, T> writer = outputFormat.getRecordWriter(taskContext); Iterator<T> tuples = this.generateTuples(num); while (tuples.hasNext()) { writer.write(NullWritable.get(), tuples.next()); } writer.close(taskContext); // Check output File outputFile = this.findOutputFile(this.folder.getRoot(), context); Assert.assertNotNull(outputFile); this.checkTuples(outputFile, num); }
From source file:org.apache.mnemonic.mapreduce.MneMapreduceBufferDataTest.java
License:Apache License
@BeforeClass public void setUp() throws IOException { m_workdir = new Path(System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom();/*from w w w . j a va 2 s. c o m*/ m_partfns = new ArrayList<String>(); try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "buffer-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[] { DurableType.BUFFER }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[] {}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[] { DurableType.BUFFER }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[] {}); }
From source file:org.apache.mnemonic.mapreduce.MneMapreduceChunkDataTest.java
License:Apache License
@BeforeClass public void setUp() throws Exception { m_workdir = new Path(System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom();//from w w w . java 2s .c o m unsafe = Utils.getUnsafe(); try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "chunk-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[] { DurableType.CHUNK }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[] {}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[] { DurableType.CHUNK }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[] {}); }
From source file:org.apache.mnemonic.mapreduce.MneMapreduceLongDataTest.java
License:Apache License
@BeforeClass public void setUp() throws IOException { m_workdir = new Path(System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom();// www . j a v a 2 s . c om try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "long-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[] { DurableType.LONG }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[] {}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 2); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[] { DurableType.LONG }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[] {}); }
From source file:org.apache.mnemonic.mapreduce.MneMapreducePersonDataTest.java
License:Apache License
@BeforeClass public void setUp() throws IOException { m_workdir = new Path(System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom();//from w ww. j a va2 s .c o m try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "person-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[] { DurableType.DURABLE }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[] { PersonListEFProxy.class }); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[] { DurableType.DURABLE }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[] { PersonListEFProxy.class }); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
@Test public void testPredicatePushdown() throws Exception { TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); final String typeStr = "struct<i:int,s:string>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>(); RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext); // write 4000 rows with the integer and the binary string TypeDescription type = TypeDescription.fromString(typeStr); OrcStruct row = (OrcStruct) OrcStruct.createValue(type); NullWritable nada = NullWritable.get(); for (int r = 0; r < 4000; ++r) { row.setFieldValue(0, new IntWritable(r)); row.setFieldValue(1, new Text(Integer.toBinaryString(r))); writer.write(nada, row);/*from ww w. j av a2s . co m*/ } writer.close(attemptContext); OrcInputFormat.setSearchArgument(conf, SearchArgumentFactory.newBuilder() .between("i", PredicateLeaf.Type.LONG, new Long(1500), new Long(1999)).build(), new String[] { null, "i", "s" }); FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]); RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split, attemptContext); // the sarg should cause it to skip over the rows except 1000 to 2000 for (int r = 1000; r < 2000; ++r) { assertEquals(true, reader.nextKeyValue()); row = reader.getCurrentValue(); assertEquals(r, ((IntWritable) row.getFieldValue(0)).get()); assertEquals(Integer.toBinaryString(r), row.getFieldValue(1).toString()); } assertEquals(false, reader.nextKeyValue()); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
@Test public void testColumnSelection() throws Exception { String typeStr = "struct<i:int,j:int,k:int>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>(); RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext); // write 4000 rows with the integer and the binary string TypeDescription type = TypeDescription.fromString(typeStr); OrcStruct row = (OrcStruct) OrcStruct.createValue(type); NullWritable nada = NullWritable.get(); for (int r = 0; r < 3000; ++r) { row.setFieldValue(0, new IntWritable(r)); row.setFieldValue(1, new IntWritable(r * 2)); row.setFieldValue(2, new IntWritable(r * 3)); writer.write(nada, row);//from ww w . j a v a 2 s. c om } writer.close(attemptContext); conf.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), "0,2"); FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]); RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split, attemptContext); // the sarg should cause it to skip over the rows except 1000 to 2000 for (int r = 0; r < 3000; ++r) { assertEquals(true, reader.nextKeyValue()); row = reader.getCurrentValue(); assertEquals(r, ((IntWritable) row.getFieldValue(0)).get()); assertEquals(null, row.getFieldValue(1)); assertEquals(r * 3, ((IntWritable) row.getFieldValue(2)).get()); } assertEquals(false, reader.nextKeyValue()); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
/** * Make sure that the writer ignores the OrcKey * @throws Exception/*from ww w . j a va2 s . c o m*/ */ @Test public void testOrcKey() throws Exception { conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); String TYPE_STRING = "struct<i:int,s:string>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); TypeDescription schema = TypeDescription.fromString(TYPE_STRING); OrcKey key = new OrcKey(new OrcStruct(schema)); RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext); NullWritable nada = NullWritable.get(); for (int r = 0; r < 2000; ++r) { ((OrcStruct) key.key).setAllFields(new IntWritable(r), new Text(Integer.toString(r))); writer.write(nada, key); } writer.close(attemptContext); Path path = new Path(workDir, "part-m-00000.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(2000, file.getNumberOfRows()); assertEquals(TYPE_STRING, file.getSchema().toString()); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
/** * Make sure that the writer ignores the OrcValue * @throws Exception// w w w .j a va 2 s . c om */ @Test public void testOrcValue() throws Exception { conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); String TYPE_STRING = "struct<i:int>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); TypeDescription schema = TypeDescription.fromString(TYPE_STRING); OrcValue value = new OrcValue(new OrcStruct(schema)); RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext); NullWritable nada = NullWritable.get(); for (int r = 0; r < 3000; ++r) { ((OrcStruct) value.value).setAllFields(new IntWritable(r)); writer.write(nada, value); } writer.close(attemptContext); Path path = new Path(workDir, "part-m-00000.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(3000, file.getNumberOfRows()); assertEquals(TYPE_STRING, file.getSchema().toString()); }
From source file:org.apache.parquet.hadoop.thrift.TestParquetToThriftReadWriteAndProjection.java
License:Apache License
private <T extends TBase<?, ?>> void shouldDoProjection(Configuration conf, T recordToWrite, T exptectedReadResult, Class<? extends TBase<?, ?>> thriftClass) throws Exception { final Path parquetFile = new Path("target/test/TestParquetToThriftReadWriteAndProjection/file.parquet"); final FileSystem fs = parquetFile.getFileSystem(conf); if (fs.exists(parquetFile)) { fs.delete(parquetFile, true);//from w w w.ja va 2 s .c o m } //create a test file final TProtocolFactory protocolFactory = new TCompactProtocol.Factory(); final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0); final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile, ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, thriftClass); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos)); recordToWrite.write(protocol); w.write(new BytesWritable(baos.toByteArray())); w.close(); final ParquetThriftInputFormat<T> parquetThriftInputFormat = new ParquetThriftInputFormat<T>(); final Job job = new Job(conf, "read"); job.setInputFormatClass(ParquetThriftInputFormat.class); ParquetThriftInputFormat.setInputPaths(job, parquetFile); final JobID jobID = new JobID("local", 1); List<InputSplit> splits = parquetThriftInputFormat .getSplits(ContextUtil.newJobContext(ContextUtil.getConfiguration(job), jobID)); T readValue = null; for (InputSplit split : splits) { TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext( ContextUtil.getConfiguration(job), new TaskAttemptID(new TaskID(jobID, true, 1), 0)); final RecordReader<Void, T> reader = parquetThriftInputFormat.createRecordReader(split, taskAttemptContext); reader.initialize(split, taskAttemptContext); if (reader.nextKeyValue()) { readValue = reader.getCurrentValue(); LOG.info(readValue); } } assertEquals(exptectedReadResult, readValue); }