List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyOutputFormat.java
License:Apache License
@Test public void testGetOutputCommitter() { try {/*from ww w . j ava 2s . c o m*/ TaskAttemptContext context = Mockito.mock(TaskAttemptContext.class); Mockito.when(context.getTaskAttemptID()).thenReturn(new TaskAttemptID("200707121733", 1, false, 1, 1)); Configuration conf = new Configuration(); Mockito.when(context.getConfiguration()).thenReturn(conf); context.getConfiguration().set("mapred.output.dir", "/out"); Assert.assertTrue(new CopyOutputFormat().getOutputCommitter(context) instanceof CopyCommitter); } catch (IOException e) { LOG.error("Exception encountered ", e); Assert.fail("Unable to get output committer"); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java
License:Apache License
public void testGetSplits(int nMaps) throws Exception { DistCpOptions options = getOptions(nMaps); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq"); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(configuration); Mockito.when(jobContext.getJobID()).thenReturn(new JobID()); UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat(); List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext); //Removing the legacy check - Refer HADOOP-9230 int sizePerMap = totalFileSize / nMaps; checkSplits(listFile, splits);/*from w w w. j a v a2 s . co m*/ int doubleCheckedTotalSize = 0; int previousSplitSize = -1; for (int i = 0; i < splits.size(); ++i) { InputSplit split = splits.get(i); int currentSplitSize = 0; TaskAttemptID taskId = new TaskAttemptID("", 0, true, 0, 0); final TaskAttemptContext taskAttemptContext = Mockito.mock(TaskAttemptContext.class); Mockito.when(taskAttemptContext.getConfiguration()).thenReturn(configuration); Mockito.when(taskAttemptContext.getTaskAttemptID()).thenReturn(taskId); RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(split, taskAttemptContext); while (recordReader.nextKeyValue()) { Path sourcePath = recordReader.getCurrentValue().getPath(); FileSystem fs = sourcePath.getFileSystem(configuration); FileStatus fileStatus[] = fs.listStatus(sourcePath); Assert.assertEquals(fileStatus.length, 1); currentSplitSize += fileStatus[0].getLen(); } Assert.assertTrue(previousSplitSize == -1 || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1); doubleCheckedTotalSize += currentSplitSize; } Assert.assertEquals(totalFileSize, doubleCheckedTotalSize); }
From source file:com.inmobi.messaging.consumer.databus.mapreduce.TestDatabusInputFormatMapReduce.java
License:Apache License
private TaskAttemptContext getTaskAttemptContext(Configuration config, TaskAttemptID taskId) { TaskAttemptContext localContext = Mockito.mock(TaskAttemptContext.class); Mockito.when(localContext.getConfiguration()).thenReturn(config); Mockito.when(localContext.getTaskAttemptID()).thenReturn(taskId); return localContext; }
From source file:com.intel.genomicsdb.GenomicsDBInputFormat.java
License:Open Source License
public RecordReader<String, VCONTEXT> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { String loaderJson;/*from w w w . j a v a 2 s . c o m*/ String queryJson; GenomicsDBFeatureReader<VCONTEXT, SOURCE> featureReader; GenomicsDBRecordReader<VCONTEXT, SOURCE> recordReader; if (taskAttemptContext != null) { Configuration configuration = taskAttemptContext.getConfiguration(); loaderJson = configuration.get(GenomicsDBConfiguration.LOADERJSON); queryJson = configuration.get(GenomicsDBConfiguration.QUERYJSON); } else { // If control comes here, means this method is called from // GenomicsDBRDD. Hence, the configuration object must be // set by setConf method, else this will lead to // NullPointerException assert (configuration != null); loaderJson = configuration.get(GenomicsDBConfiguration.LOADERJSON); queryJson = configuration.get(GenomicsDBConfiguration.QUERYJSON); } featureReader = new GenomicsDBFeatureReader<VCONTEXT, SOURCE>(loaderJson, queryJson, (FeatureCodec<VCONTEXT, SOURCE>) new BCF2Codec()); recordReader = new GenomicsDBRecordReader<VCONTEXT, SOURCE>(featureReader); return recordReader; }
From source file:com.jbw.tar.sf.TarOutputFormat.java
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String extension = ".tar"; Path file = getDefaultWorkFile(context, extension); FileSystem fs = file.getFileSystem(conf); OutputStream fileOut = fs.create(file, false); //tar?//from w ww .j av a 2 s . c o m return new TarOutputWriter<>(fileOut); }
From source file:com.jbw.taroutputformat.TarOutputFormat.java
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext tac) throws IOException, InterruptedException { Configuration conf = tac.getConfiguration(); String extension = ".tar"; Path file = getDefaultWorkFile(tac, extension); FileSystem fs = file.getFileSystem(conf); OutputStream fileOut = fs.create(file, false); return new TarOutputWriter<>(fileOut); }
From source file:com.jumptap.h2redis.RedisOutputFormat.java
License:Open Source License
@Override public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { return new RedisHMRecordWriter(context.getConfiguration()); }
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Performs all the necessary actions to initialize and prepare this record reader. *//*w w w .jav a2 s.c o m*/ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { this.ctx = context; conf = context.getConfiguration(); keysRead = 0; components = Sets.newHashSetWithExpectedSize(3); FileSplit split = (FileSplit) inputSplit; validateConfiguration(conf); // Get comparator. Subcomparator can be null. AbstractType<?> comparator = getConfComparator(conf); AbstractType<?> subcomparator = getConfSubComparator(conf); // Get partitioner for keys IPartitioner partitioner = getConfPartitioner(conf); // Move minimum required db tables to local disk. Path dataTablePath = split.getPath(); FileSystem remoteFS = FileSystem.get(dataTablePath.toUri(), conf); FileSystem localFS = FileSystem.getLocal(conf); copyTablesToLocal(remoteFS, localFS, dataTablePath, context); CFMetaData cfMetaData; if (getConfIsSparse(conf)) { cfMetaData = CFMetaData.sparseCFMetaData(getDescriptor().ksname, getDescriptor().cfname, comparator); } else { cfMetaData = CFMetaData.denseCFMetaData(getDescriptor().ksname, getDescriptor().cfname, comparator, subcomparator); } // Open table and get scanner SSTableReader tableReader = openSSTableReader(partitioner, cfMetaData); setTableScanner(tableReader); }
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Moves all the minimum required tables for the table reader to work to local disk. * * @param split The table to work on.//from w w w. ja v a 2 s . c o m */ @VisibleForTesting void copyTablesToLocal(FileSystem remoteFS, FileSystem localFS, Path dataTablePath, TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); String hdfsDataTablePathStr = dataTablePath.toUri().getPath(); String localDataTablePathStr = dataTablePath.toUri().getHost() + File.separator + dataTablePath.toUri().getPath(); // Make path relative due to EMR permissions if (localDataTablePathStr.startsWith("/")) { String mapTaskId = conf.get("mapreduce.task.attempt.id"); String mapTempDir = conf.get("mapreduce.cluster.temp.dir"); String taskWorkDir = mapTempDir + File.separator + mapTaskId; LOG.info("Appending {} to {}", taskWorkDir, localDataTablePathStr); localDataTablePathStr = taskWorkDir + localDataTablePathStr; } Path localDataTablePath = new Path(localDataTablePathStr); LOG.info("Copying hdfs file from {} to local disk at {}.", dataTablePath.toUri(), localDataTablePath.toUri()); copyToLocalFile(remoteFS, localFS, dataTablePath, localDataTablePath); boolean isCompressed = conf.getBoolean(PropertyConstants.COMPRESSION_ENABLED.txt, false); if (isCompressed) { decompress(localDataTablePath, context); } components.add(Component.DATA); desc = Descriptor.fromFilename(localDataTablePathStr); Descriptor hdfsDesc = Descriptor.fromFilename(hdfsDataTablePathStr); String indexPathStr = hdfsDesc.filenameFor(Component.PRIMARY_INDEX); components.add(Component.PRIMARY_INDEX); Path localIdxPath = new Path(desc.filenameFor(Component.PRIMARY_INDEX)); LOG.info("Copying hdfs file from {} to local disk at {}.", indexPathStr, localIdxPath); copyToLocalFile(remoteFS, localFS, new Path(indexPathStr), localIdxPath); if (isCompressed) { decompress(localIdxPath, context); } String compressionTablePathStr = hdfsDesc.filenameFor(Component.COMPRESSION_INFO.name()); Path compressionTablePath = new Path(compressionTablePathStr); if (remoteFS.exists(compressionTablePath)) { Path localCompressionPath = new Path(desc.filenameFor(Component.COMPRESSION_INFO.name())); LOG.info("Copying hdfs file from {} to local disk at {}.", compressionTablePath.toUri(), localCompressionPath); copyToLocalFile(remoteFS, localFS, compressionTablePath, localCompressionPath); if (isCompressed) { decompress(localCompressionPath, context); } components.add(Component.COMPRESSION_INFO); } }
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Decompresses input files that were snappy compressed before opening them with the sstable * reader. It writes a new decompressed file with the same name as the compressed one. The old * one gets deleted.// w w w .jav a 2 s. co m */ private void decompress(Path localTablePath, TaskAttemptContext context) throws IOException { context.setStatus(String.format("Decompressing %s", localTablePath.toUri())); int compressionBufSize = context.getConfiguration().getInt(PropertyConstants.DECOMPRESS_BUFFER.txt, DEFAULT_DECOMPRESS_BUFFER_SIZE); compressionBufSize *= 1024; LOG.info("Decompressing {} with buffer size {}.", localTablePath, compressionBufSize); File compressedFile = new File(localTablePath.toString()); InputStream fis = new FileInputStream(compressedFile); InputStream bis = new BufferedInputStream(fis, compressionBufSize); InputStream sip = new SnappyInputStream(bis); File decompressedFile = new File(localTablePath.toString() + ".tmp"); OutputStream os = new FileOutputStream(decompressedFile); OutputStream bos = new BufferedOutputStream(os, compressionBufSize); byte[] inByteArr = new byte[compressionBufSize]; int bytesRead = 0; int bytesSinceLastReport = 0; while ((bytesRead = sip.read(inByteArr)) > 0) { bos.write(inByteArr, 0, bytesRead); bytesSinceLastReport += bytesRead; // Avoid timeouts. Report progress to the jobtracker. if (bytesSinceLastReport % REPORT_DECOMPRESS_PROGRESS_EVERY_GBS > 0) { context.setStatus(String.format("Decompressed %d bytes.", bytesSinceLastReport)); bytesSinceLastReport -= REPORT_DECOMPRESS_PROGRESS_EVERY_GBS; } } sip.close(); bos.close(); compressedFile.delete(); decompressedFile.renameTo(compressedFile); }