List of usage examples for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID
@Deprecated public TaskAttemptID(String jtIdentifier, int jobId, boolean isMap, int taskId, int id)
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java
License:Apache License
@Test public void testOneFile() throws IOException, InterruptedException { Path dir = new Path(tempFolder.getRoot().getAbsolutePath()); CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>(); Path inputFile = new Path(dir, "file1.txt"); writeSequenceFile(inputFile);/* w w w.j a va 2s.c o m*/ Job job = new Job(new JobConf()); FileInputFormat.addInputPath(job, inputFile); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size()); TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0); Configuration conf1 = new Configuration(); TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId); RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1); rr.initialize(splits.get(0), context1); assertTrue(rr.nextKeyValue()); assertEquals(key, rr.getCurrentKey()); assertEquals(value, rr.getCurrentValue()); assertFalse(rr.nextKeyValue()); assertEquals(1.0f, rr.getProgress(), 0.1); }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java
License:Apache License
@Test public void testTwoFiles() throws IOException, InterruptedException { Path dir = new Path(tempFolder.getRoot().getAbsolutePath()); CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>(); Path inputFile1 = new Path(dir, "file1.txt"); Path inputFile2 = new Path(dir, "file2.txt"); writeSequenceFile(inputFile1);//from w w w. j av a 2 s .c o m writeSequenceFile(inputFile2); Job job = new Job(new JobConf()); FileInputFormat.addInputPath(job, inputFile1); FileInputFormat.addInputPath(job, inputFile2); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size()); TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0); Configuration conf1 = new Configuration(); TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId); RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1); rr.initialize(splits.get(0), context1); assertTrue(rr.nextKeyValue()); assertEquals(key, rr.getCurrentKey()); assertEquals(value, rr.getCurrentValue()); assertEquals(0.5f, rr.getProgress(), 0.1); assertTrue(rr.nextKeyValue()); assertEquals(key, rr.getCurrentKey()); assertEquals(value, rr.getCurrentValue()); assertFalse(rr.nextKeyValue()); assertEquals(1.0f, rr.getProgress(), 0.1); }
From source file:com.google.appengine.tools.mapreduce.AppEngineMapperTest.java
License:Apache License
@Override protected void setUp() throws Exception { super.setUp(); helper.setUp();/* w w w. j a v a 2 s. com*/ // datastoreService = DatastoreServiceFactory.getDatastoreService(); mapper = new TestMapper(); Configuration conf = new Configuration(false); TaskAttemptID id = new TaskAttemptID("foo", 1, true, 1, 1); context = mapper.new AppEngineContext(conf, id, null, null, null, null, null); datastoreService = DatastoreServiceFactory.getDatastoreService(); }
From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java
License:Open Source License
/** * Generate random data, compress it, index and md5 hash the data. * Then read it all back and md5 that too, to verify that it all went ok. * // ww w. j a va 2 s .c om * @param testWithIndex Should we index or not? * @param charsToOutput How many characters of random data should we output. * @throws IOException * @throws NoSuchAlgorithmException * @throws InterruptedException */ private void runTest(boolean testWithIndex, int charsToOutput) throws IOException, NoSuchAlgorithmException, InterruptedException { if (!GPLNativeCodeLoader.isNativeCodeLoaded()) { LOG.warn("Cannot run this test without the native lzo libraries"); return; } Configuration conf = new Configuration(); conf.setLong("fs.local.block.size", charsToOutput / 2); // reducing block size to force a split of the tiny file conf.set("io.compression.codecs", LzopCodec.class.getName()); FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(outputDir, true); localFs.mkdirs(outputDir); Job job = new Job(conf); TextOutputFormat.setCompressOutput(job, true); TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class); TextOutputFormat.setOutputPath(job, outputDir); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2)); // create some input data byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput); if (testWithIndex) { Path lzoFile = new Path(outputDir, lzoFileName); LzoTextInputFormat.createIndex(localFs, lzoFile); } LzoTextInputFormat inputFormat = new LzoTextInputFormat(); TextInputFormat.setInputPaths(job, outputDir); List<InputSplit> is = inputFormat.getSplits(job); //verify we have the right number of lzo chunks if (testWithIndex && OUTPUT_BIG == charsToOutput) { assertEquals(3, is.size()); } else { assertEquals(1, is.size()); } // let's read it all and calculate the md5 hash for (InputSplit inputSplit : is) { RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext); rr.initialize(inputSplit, attemptContext); while (rr.nextKeyValue()) { Text value = rr.getCurrentValue(); md5.update(value.getBytes(), 0, value.getLength()); } rr.close(); } localFs.close(); assertTrue(Arrays.equals(expectedMd5, md5.digest())); }
From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java
License:Apache License
@Test public void testGetSplits() throws Exception { DistCpOptions options = getOptions(); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing( new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);//from ww w . j a v a2s. com JobID jobId = new JobID(); JobContext jobContext = mock(JobContext.class); when(jobContext.getConfiguration()).thenReturn(configuration); when(jobContext.getJobID()).thenReturn(jobId); DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>(); List<InputSplit> splits = inputFormat.getSplits(jobContext); int nFiles = 0; int taskId = 0; for (InputSplit split : splits) { TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0); final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class); when(taskAttemptContext.getConfiguration()).thenReturn(configuration); when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId); RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(splits.get(0), taskAttemptContext); float previousProgressValue = 0f; while (recordReader.nextKeyValue()) { FileStatus fileStatus = recordReader.getCurrentValue(); String source = fileStatus.getPath().toString(); System.out.println(source); Assert.assertTrue(expectedFilePaths.contains(source)); final float progress = recordReader.getProgress(); Assert.assertTrue(progress >= previousProgressValue); Assert.assertTrue(progress >= 0.0f); Assert.assertTrue(progress <= 1.0f); previousProgressValue = progress; ++nFiles; } Assert.assertTrue(recordReader.getProgress() == 1.0f); ++taskId; } Assert.assertEquals(expectedFilePaths.size(), nFiles); }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
private TaskAttemptContext getTaskAttemptContext(Configuration conf) { TaskAttemptContext context = Mockito.mock(TaskAttemptContext.class); Mockito.when(context.getConfiguration()).thenReturn(conf); TaskAttemptID taskId = new TaskAttemptID("200707121733", 1, false, 1, 1); Mockito.when(context.getTaskAttemptID()).thenReturn(taskId); return context; }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyOutputFormat.java
License:Apache License
@Test public void testGetOutputCommitter() { try {//from ww w . j a v a 2 s . co m TaskAttemptContext context = Mockito.mock(TaskAttemptContext.class); Mockito.when(context.getTaskAttemptID()).thenReturn(new TaskAttemptID("200707121733", 1, false, 1, 1)); Configuration conf = new Configuration(); Mockito.when(context.getConfiguration()).thenReturn(conf); context.getConfiguration().set("mapred.output.dir", "/out"); Assert.assertTrue(new CopyOutputFormat().getOutputCommitter(context) instanceof CopyCommitter); } catch (IOException e) { LOG.error("Exception encountered ", e); Assert.fail("Unable to get output committer"); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java
License:Apache License
public void testGetSplits(int nMaps) throws Exception { DistCpOptions options = getOptions(nMaps); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq"); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(configuration); Mockito.when(jobContext.getJobID()).thenReturn(new JobID()); UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat(); List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext); //Removing the legacy check - Refer HADOOP-9230 int sizePerMap = totalFileSize / nMaps; checkSplits(listFile, splits);//ww w. j a v a 2s. c o m int doubleCheckedTotalSize = 0; int previousSplitSize = -1; for (int i = 0; i < splits.size(); ++i) { InputSplit split = splits.get(i); int currentSplitSize = 0; TaskAttemptID taskId = new TaskAttemptID("", 0, true, 0, 0); final TaskAttemptContext taskAttemptContext = Mockito.mock(TaskAttemptContext.class); Mockito.when(taskAttemptContext.getConfiguration()).thenReturn(configuration); Mockito.when(taskAttemptContext.getTaskAttemptID()).thenReturn(taskId); RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(split, taskAttemptContext); while (recordReader.nextKeyValue()) { Path sourcePath = recordReader.getCurrentValue().getPath(); FileSystem fs = sourcePath.getFileSystem(configuration); FileStatus fileStatus[] = fs.listStatus(sourcePath); Assert.assertEquals(fileStatus.length, 1); currentSplitSize += fileStatus[0].getLen(); } Assert.assertTrue(previousSplitSize == -1 || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1); doubleCheckedTotalSize += currentSplitSize; } Assert.assertEquals(totalFileSize, doubleCheckedTotalSize); }
From source file:com.moz.fiji.mapreduce.platform.CDH5FijiMRBridge.java
License:Apache License
/** {@inheritDoc} */ @Override/*from ww w .j av a2 s .co m*/ public TaskAttemptID newTaskAttemptID(String jtIdentifier, int jobId, TaskType type, int taskId, int id) { // In CDH4+, use all these args directly. return new TaskAttemptID(jtIdentifier, jobId, type, taskId, id); }
From source file:com.project.test.parquet.TestParquetTBaseScheme.java
License:Apache License
private void createFileForRead() throws Exception { final Path fileToCreate = new Path(parquetInputPath + "/names.parquet"); final Configuration conf = new Configuration(); final FileSystem fs = fileToCreate.getFileSystem(conf); if (fs.exists(fileToCreate)) fs.delete(fileToCreate, true);/*from w w w.ja va 2 s . c o m*/ TProtocolFactory protocolFactory = new TCompactProtocol.Factory(); TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0); ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(fileToCreate, ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, Name.class); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos)); Name n1 = new Name(); n1.setFirst_name("Alice"); n1.setLast_name("Practice"); Name n2 = new Name(); n2.setFirst_name("Bob"); n2.setLast_name("Hope"); Name n3 = new Name(); n3.setFirst_name("Charlie"); n3.setLast_name("Horse"); n1.write(protocol); w.write(new BytesWritable(baos.toByteArray())); baos.reset(); n2.write(protocol); w.write(new BytesWritable(baos.toByteArray())); baos.reset(); n3.write(protocol); w.write(new BytesWritable(baos.toByteArray())); w.close(); }