List of usage examples for org.apache.hadoop.fs LocalDirAllocator LocalDirAllocator
public LocalDirAllocator(String contextCfgItemName)
From source file:org.apache.tez.runtime.library.common.shuffle.impl.Shuffle.java
License:Apache License
public Shuffle(TezInputContext inputContext, Configuration conf, int numInputs) throws IOException { this.inputContext = inputContext; this.conf = conf; this.metrics = new ShuffleClientMetrics(inputContext.getDAGName(), inputContext.getTaskVertexName(), inputContext.getTaskIndex(), this.conf, UserGroupInformation.getCurrentUser().getShortUserName()); this.numInputs = numInputs; this.jobTokenSecret = ShuffleUtils.getJobTokenSecretFromTokenBytes( inputContext.getServiceConsumerMetaData(ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID)); if (ConfigUtils.isIntermediateInputCompressed(conf)) { Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(conf, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); } else {// w w w . j a va 2 s . c o m codec = null; } this.ifileReadAhead = conf.getBoolean(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD, TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT); if (this.ifileReadAhead) { this.ifileReadAheadLength = conf.getInt(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT); } else { this.ifileReadAheadLength = 0; } Combiner combiner = TezRuntimeUtils.instantiateCombiner(conf, inputContext); FileSystem localFS = FileSystem.getLocal(this.conf); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezJobConfig.LOCAL_DIRS); // TODO TEZ Get rid of Map / Reduce references. TezCounter shuffledMapsCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLED_MAPS); TezCounter reduceShuffleBytes = inputContext.getCounters().findCounter(TaskCounter.REDUCE_SHUFFLE_BYTES); TezCounter failedShuffleCounter = inputContext.getCounters().findCounter(TaskCounter.FAILED_SHUFFLE); TezCounter spilledRecordsCounter = inputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS); TezCounter reduceCombineInputCounter = inputContext.getCounters() .findCounter(TaskCounter.COMBINE_INPUT_RECORDS); TezCounter mergedMapOutputsCounter = inputContext.getCounters().findCounter(TaskCounter.MERGED_MAP_OUTPUTS); LOG.info("Shuffle assigned with " + numInputs + " inputs" + ", codec: " + (codec == null ? "None" : codec.getClass().getName()) + "ifileReadAhead: " + ifileReadAhead); scheduler = new ShuffleScheduler(this.inputContext, this.conf, this.numInputs, this, shuffledMapsCounter, reduceShuffleBytes, failedShuffleCounter); eventHandler = new ShuffleInputEventHandler(inputContext, scheduler); merger = new MergeManager(this.conf, localFS, localDirAllocator, inputContext, combiner, spilledRecordsCounter, reduceCombineInputCounter, mergedMapOutputsCounter, this); }
From source file:org.apache.tez.runtime.library.common.shuffle.impl.ShuffleManager.java
License:Apache License
public ShuffleManager(InputContext inputContext, Configuration conf, int numInputs, int bufferSize, boolean ifileReadAheadEnabled, int ifileReadAheadLength, CompressionCodec codec, FetchedInputAllocator inputAllocator) throws IOException { this.inputContext = inputContext; this.numInputs = numInputs; this.shuffledInputsCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_SHUFFLED_INPUTS); this.failedShufflesCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_FAILED_SHUFFLE_INPUTS); this.bytesShuffledCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES); this.decompressedDataSizeCounter = inputContext.getCounters() .findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED); this.bytesShuffledToDiskCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_DISK); this.bytesShuffledToMemCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_MEM); this.bytesShuffledDirectDiskCounter = inputContext.getCounters() .findCounter(TaskCounter.SHUFFLE_BYTES_DISK_DIRECT); this.ifileBufferSize = bufferSize; this.ifileReadAhead = ifileReadAheadEnabled; this.ifileReadAheadLength = ifileReadAheadLength; this.codec = codec; this.inputManager = inputAllocator; this.localDiskFetchEnabled = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH_DEFAULT); this.sharedFetchEnabled = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_SHARED_FETCH, TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_SHARED_FETCH_DEFAULT); this.shufflePhaseTime = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_PHASE_TIME); this.firstEventReceived = inputContext.getCounters().findCounter(TaskCounter.FIRST_EVENT_RECEIVED); this.lastEventReceived = inputContext.getCounters().findCounter(TaskCounter.LAST_EVENT_RECEIVED); this.srcNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName()); completedInputSet = Collections.newSetFromMap(new ConcurrentHashMap<InputIdentifier, Boolean>(numInputs)); completedInputs = new LinkedBlockingQueue<FetchedInput>(numInputs); knownSrcHosts = new ConcurrentHashMap<String, InputHost>(); pendingHosts = new LinkedBlockingQueue<InputHost>(); obsoletedInputs = Collections.newSetFromMap(new ConcurrentHashMap<InputAttemptIdentifier, Boolean>()); runningFetchers = Collections.newSetFromMap(new ConcurrentHashMap<Fetcher, Boolean>()); int maxConfiguredFetchers = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES_DEFAULT); this.numFetchers = Math.min(maxConfiguredFetchers, numInputs); ExecutorService fetcherRawExecutor = Executors.newFixedThreadPool(numFetchers, new ThreadFactoryBuilder() .setDaemon(true).setNameFormat("Fetcher [" + srcNameTrimmed + "] #%d").build()); this.fetcherExecutor = MoreExecutors.listeningDecorator(fetcherRawExecutor); ExecutorService schedulerRawExecutor = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder() .setDaemon(true).setNameFormat("ShuffleRunner [" + srcNameTrimmed + "]").build()); this.schedulerExecutor = MoreExecutors.listeningDecorator(schedulerRawExecutor); this.schedulerCallable = new RunShuffleCallable(conf); this.startTime = System.currentTimeMillis(); this.lastProgressTime = startTime; SecretKey shuffleSecret = ShuffleUtils.getJobTokenSecretFromTokenBytes( inputContext.getServiceConsumerMetaData(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID)); this.jobTokenSecretMgr = new JobTokenSecretManager(shuffleSecret); httpConnectionParams = ShuffleUtils.constructHttpShuffleConnectionParams(conf); this.localFs = (RawLocalFileSystem) FileSystem.getLocal(conf).getRaw(); this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); this.localDisks = Iterables.toArray(localDirAllocator.getAllLocalPathsToRead(".", conf), Path.class); Arrays.sort(this.localDisks); LOG.info(this.getClass().getSimpleName() + " : numInputs=" + numInputs + ", compressionCodec=" + (codec == null ? "NoCompressionCodec" : codec.getClass().getName()) + ", numFetchers=" + numFetchers + ", ifileBufferSize=" + ifileBufferSize + ", ifileReadAheadEnabled=" + ifileReadAhead + ", ifileReadAheadLength=" + ifileReadAheadLength + ", " + "localDiskFetchEnabled=" + localDiskFetchEnabled + ", " + "sharedFetchEnabled=" + sharedFetchEnabled + ", " + httpConnectionParams.toString()); }
From source file:org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator.java
License:Apache License
public SimpleFetchedInputAllocator(String uniqueIdentifier, Configuration conf, long maxTaskAvailableMemory, long memoryAvailable) { this.conf = conf; this.maxAvailableTaskMemory = maxTaskAvailableMemory; this.initialMemoryAvailable = memoryAvailable; this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier); this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); // Setup configuration final float maxInMemCopyUse = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse); }/* ww w .j a v a2 s . co m*/ long memReq = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, Math.min(maxAvailableTaskMemory, Integer.MAX_VALUE)) * maxInMemCopyUse); if (memReq <= this.initialMemoryAvailable) { this.memoryLimit = memReq; } else { this.memoryLimit = initialMemoryAvailable; } LOG.info("RequestedMem=" + memReq + ", Allocated: " + this.memoryLimit); final float singleShuffleMemoryLimitPercent = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new IllegalArgumentException( "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": " + singleShuffleMemoryLimitPercent); } //TODO: cap it to MAX_VALUE until MemoryFetchedInput can handle > 2 GB this.maxSingleShuffleLimit = (long) Math.min((memoryLimit * singleShuffleMemoryLimitPercent), Integer.MAX_VALUE); LOG.info("SimpleInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: " + this.maxSingleShuffleLimit); }
From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.java
License:Apache License
@VisibleForTesting protected Path getShuffleInputFileName(String pathComponent, String suffix) throws IOException { LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); suffix = suffix != null ? suffix : ""; String pathFromLocalDir = Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR + Path.SEPARATOR + pathComponent + Path.SEPARATOR + Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING + suffix; return localDirAllocator.getLocalPathToRead(pathFromLocalDir.toString(), conf); }
From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle.java
License:Apache License
public Shuffle(InputContext inputContext, Configuration conf, int numInputs, long initialMemoryAvailable) throws IOException { this.inputContext = inputContext; this.conf = conf; this.httpConnectionParams = ShuffleUtils.constructHttpShuffleConnectionParams(conf); this.metrics = new ShuffleClientMetrics(inputContext.getDAGName(), inputContext.getTaskVertexName(), inputContext.getTaskIndex(), this.conf, UserGroupInformation.getCurrentUser().getShortUserName()); this.srcNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName()); this.jobTokenSecret = ShuffleUtils.getJobTokenSecretFromTokenBytes( inputContext.getServiceConsumerMetaData(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID)); this.jobTokenSecretMgr = new JobTokenSecretManager(jobTokenSecret); if (ConfigUtils.isIntermediateInputCompressed(conf)) { Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(conf, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); } else {/* ww w . ja va 2 s . c o m*/ codec = null; } this.ifileReadAhead = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT); if (this.ifileReadAhead) { this.ifileReadAheadLength = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT); } else { this.ifileReadAheadLength = 0; } Combiner combiner = TezRuntimeUtils.instantiateCombiner(conf, inputContext); FileSystem localFS = FileSystem.getLocal(this.conf); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); // TODO TEZ Get rid of Map / Reduce references. TezCounter shuffledInputsCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_SHUFFLED_INPUTS); TezCounter reduceShuffleBytes = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES); TezCounter reduceDataSizeDecompressed = inputContext.getCounters() .findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED); TezCounter failedShuffleCounter = inputContext.getCounters() .findCounter(TaskCounter.NUM_FAILED_SHUFFLE_INPUTS); TezCounter spilledRecordsCounter = inputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS); TezCounter reduceCombineInputCounter = inputContext.getCounters() .findCounter(TaskCounter.COMBINE_INPUT_RECORDS); TezCounter mergedMapOutputsCounter = inputContext.getCounters().findCounter(TaskCounter.MERGED_MAP_OUTPUTS); TezCounter bytesShuffedToDisk = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_DISK); TezCounter bytesShuffedToDiskDirect = inputContext.getCounters() .findCounter(TaskCounter.SHUFFLE_BYTES_DISK_DIRECT); TezCounter bytesShuffedToMem = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_MEM); LOG.info("Shuffle assigned with " + numInputs + " inputs" + ", codec: " + (codec == null ? "None" : codec.getClass().getName()) + "ifileReadAhead: " + ifileReadAhead); boolean sslShuffle = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL_DEFAULT); startTime = System.currentTimeMillis(); scheduler = new ShuffleScheduler(this.inputContext, this.conf, numInputs, this, shuffledInputsCounter, reduceShuffleBytes, reduceDataSizeDecompressed, failedShuffleCounter, bytesShuffedToDisk, bytesShuffedToDiskDirect, bytesShuffedToMem, startTime); this.mergePhaseTime = inputContext.getCounters().findCounter(TaskCounter.MERGE_PHASE_TIME); this.shufflePhaseTime = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_PHASE_TIME); merger = new MergeManager(this.conf, localFS, localDirAllocator, inputContext, combiner, spilledRecordsCounter, reduceCombineInputCounter, mergedMapOutputsCounter, this, initialMemoryAvailable, codec, ifileReadAhead, ifileReadAheadLength); eventHandler = new ShuffleInputEventHandlerOrderedGrouped(inputContext, scheduler, sslShuffle); ExecutorService rawExecutor = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder().setDaemon(true) .setNameFormat("ShuffleAndMergeRunner [" + srcNameTrimmed + "]").build()); int configuredNumFetchers = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES_DEFAULT); numFetchers = Math.min(configuredNumFetchers, numInputs); LOG.info("Num fetchers being started: " + numFetchers); fetchers = Lists.newArrayListWithCapacity(numFetchers); localDiskFetchEnabled = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH_DEFAULT); executor = MoreExecutors.listeningDecorator(rawExecutor); runShuffleCallable = new RunShuffleCallable(); }
From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.TestMergeManager.java
License:Apache License
@Test(timeout = 10000) public void testConfigs() throws IOException { long maxTaskMem = 8192 * 1024 * 1024l; //Test Shuffle fetch buffer and post merge buffer percentage Configuration conf = new TezConfiguration(defaultConf); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.8f); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.5f); Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) == 6871947776l); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.5f); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.5f); Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) > Integer.MAX_VALUE); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.9f); Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) > Integer.MAX_VALUE); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.1f); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.1f); Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) < Integer.MAX_VALUE); try {// w ww . j a va 2s . c o m conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 2.4f); MergeManager.getInitialMemoryRequirement(conf, maxTaskMem); Assert.fail("Should have thrown wrong buffer percent configuration exception"); } catch (IllegalArgumentException ie) { } try { conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, -2.4f); MergeManager.getInitialMemoryRequirement(conf, maxTaskMem); Assert.fail("Should have thrown wrong buffer percent configuration exception"); } catch (IllegalArgumentException ie) { } try { conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 1.4f); MergeManager.getInitialMemoryRequirement(conf, maxTaskMem); Assert.fail("Should have thrown wrong post merge buffer percent configuration exception"); } catch (IllegalArgumentException ie) { } try { conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, -1.4f); MergeManager.getInitialMemoryRequirement(conf, maxTaskMem); Assert.fail("Should have thrown wrong post merge buffer percent configuration exception"); } catch (IllegalArgumentException ie) { } try { conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 1.4f); MergeManager.getInitialMemoryRequirement(conf, maxTaskMem); Assert.fail("Should have thrown wrong shuffle fetch buffer percent configuration exception"); } catch (IllegalArgumentException ie) { } try { conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, -1.4f); MergeManager.getInitialMemoryRequirement(conf, maxTaskMem); Assert.fail("Should have thrown wrong shuffle fetch buffer percent configuration exception"); } catch (IllegalArgumentException ie) { } //test post merge mem limit conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.8f); FileSystem localFs = FileSystem.getLocal(conf); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); InputContext t0inputContext = createMockInputContext(UUID.randomUUID().toString(), maxTaskMem); ExceptionReporter t0exceptionReporter = mock(ExceptionReporter.class); long initialMemoryAvailable = (long) (maxTaskMem * 0.8); MergeManager mergeManager = new MergeManager(conf, localFs, localDirAllocator, t0inputContext, null, null, null, null, t0exceptionReporter, initialMemoryAvailable, null, false, -1); Assert.assertTrue(mergeManager.postMergeMemLimit > Integer.MAX_VALUE); initialMemoryAvailable = 200 * 1024 * 1024l; //initial mem < memlimit mergeManager = new MergeManager(conf, localFs, localDirAllocator, t0inputContext, null, null, null, null, t0exceptionReporter, initialMemoryAvailable, null, false, -1); Assert.assertTrue(mergeManager.postMergeMemLimit == initialMemoryAvailable); }
From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.TestMergeManager.java
License:Apache License
@Test(timeout = 10000) public void testLocalDiskMergeMultipleTasks() throws IOException { Configuration conf = new TezConfiguration(defaultConf); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName()); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName()); Path localDir = new Path(workDir, "local"); Path srcDir = new Path(workDir, "srcData"); localFs.mkdirs(localDir);//from w ww . java 2 s.com localFs.mkdirs(srcDir); conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString()); FileSystem localFs = FileSystem.getLocal(conf); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); InputContext t0inputContext = createMockInputContext(UUID.randomUUID().toString()); InputContext t1inputContext = createMockInputContext(UUID.randomUUID().toString()); ExceptionReporter t0exceptionReporter = mock(ExceptionReporter.class); ExceptionReporter t1exceptionReporter = mock(ExceptionReporter.class); MergeManager t0mergeManagerReal = new MergeManager(conf, localFs, localDirAllocator, t0inputContext, null, null, null, null, t0exceptionReporter, 2000000, null, false, -1); MergeManager t0mergeManager = spy(t0mergeManagerReal); MergeManager t1mergeManagerReal = new MergeManager(conf, localFs, localDirAllocator, t1inputContext, null, null, null, null, t1exceptionReporter, 2000000, null, false, -1); MergeManager t1mergeManager = spy(t1mergeManagerReal); // Partition 0 Keys 0-2, Partition 1 Keys 3-5 SrcFileInfo src1Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src1.out"), 2, 3, 0); // Partition 0 Keys 6-8, Partition 1 Keys 9-11 SrcFileInfo src2Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src2.out"), 2, 3, 6); // Simulating Task 0 fetches partition 0. (targetIndex = 0,1) // Simulating Task 1 fetches partition 1. (targetIndex = 0,1) InputAttemptIdentifier t0Identifier0 = new InputAttemptIdentifier(0, 0, src1Info.path.getName()); InputAttemptIdentifier t0Identifier1 = new InputAttemptIdentifier(1, 0, src2Info.path.getName()); InputAttemptIdentifier t1Identifier0 = new InputAttemptIdentifier(0, 0, src1Info.path.getName()); InputAttemptIdentifier t1Identifier1 = new InputAttemptIdentifier(1, 0, src2Info.path.getName()); MapOutput t0MapOutput0 = getMapOutputForDirectDiskFetch(t0Identifier0, src1Info.path, src1Info.indexedRecords[0], t0mergeManager); MapOutput t0MapOutput1 = getMapOutputForDirectDiskFetch(t0Identifier1, src2Info.path, src2Info.indexedRecords[0], t0mergeManager); MapOutput t1MapOutput0 = getMapOutputForDirectDiskFetch(t1Identifier0, src1Info.path, src1Info.indexedRecords[1], t1mergeManager); MapOutput t1MapOutput1 = getMapOutputForDirectDiskFetch(t1Identifier1, src2Info.path, src2Info.indexedRecords[1], t1mergeManager); t0MapOutput0.commit(); t0MapOutput1.commit(); verify(t0mergeManager).closeOnDiskFile(t0MapOutput0.getOutputPath()); verify(t0mergeManager).closeOnDiskFile(t0MapOutput1.getOutputPath()); // Run the OnDiskMerge via MergeManager // Simulate the thread invocation - remove files, and invoke merge List<FileChunk> t0MergeFiles = new LinkedList<FileChunk>(); t0MergeFiles.addAll(t0mergeManager.onDiskMapOutputs); t0mergeManager.onDiskMapOutputs.clear(); t0mergeManager.onDiskMerger.merge(t0MergeFiles); Assert.assertEquals(1, t0mergeManager.onDiskMapOutputs.size()); t1MapOutput0.commit(); t1MapOutput1.commit(); verify(t1mergeManager).closeOnDiskFile(t1MapOutput0.getOutputPath()); verify(t1mergeManager).closeOnDiskFile(t1MapOutput1.getOutputPath()); // Run the OnDiskMerge via MergeManager // Simulate the thread invocation - remove files, and invoke merge List<FileChunk> t1MergeFiles = new LinkedList<FileChunk>(); t1MergeFiles.addAll(t1mergeManager.onDiskMapOutputs); t1mergeManager.onDiskMapOutputs.clear(); t1mergeManager.onDiskMerger.merge(t1MergeFiles); Assert.assertEquals(1, t1mergeManager.onDiskMapOutputs.size()); Assert.assertNotEquals(t0mergeManager.onDiskMapOutputs.iterator().next().getPath(), t1mergeManager.onDiskMapOutputs.iterator().next().getPath()); Assert.assertTrue(t0mergeManager.onDiskMapOutputs.iterator().next().getPath().toString() .contains(t0inputContext.getUniqueIdentifier())); Assert.assertTrue(t1mergeManager.onDiskMapOutputs.iterator().next().getPath().toString() .contains(t1inputContext.getUniqueIdentifier())); }
From source file:org.apache.tez.runtime.library.common.TestValuesIterator.java
License:Apache License
/** * create inmemory segments/*w w w . j a v a 2 s .c om*/ * * @return * @throws IOException */ public List<TezMerger.Segment> createInMemStreams() throws IOException { int numberOfStreams = Math.max(2, rnd.nextInt(10)); LOG.info("No of streams : " + numberOfStreams); SerializationFactory serializationFactory = new SerializationFactory(conf); Serializer keySerializer = serializationFactory.getSerializer(keyClass); Serializer valueSerializer = serializationFactory.getSerializer(valClass); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); InputContext context = createTezInputContext(); MergeManager mergeManager = new MergeManager(conf, fs, localDirAllocator, context, null, null, null, null, null, 1024 * 1024 * 10, null, false, -1); DataOutputBuffer keyBuf = new DataOutputBuffer(); DataOutputBuffer valBuf = new DataOutputBuffer(); DataInputBuffer keyIn = new DataInputBuffer(); DataInputBuffer valIn = new DataInputBuffer(); keySerializer.open(keyBuf); valueSerializer.open(valBuf); List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>(); for (int i = 0; i < numberOfStreams; i++) { BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024); InMemoryWriter writer = new InMemoryWriter(bout); Map<Writable, Writable> data = createData(); //write data for (Map.Entry<Writable, Writable> entry : data.entrySet()) { keySerializer.serialize(entry.getKey()); valueSerializer.serialize(entry.getValue()); keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength()); valIn.reset(valBuf.getData(), 0, valBuf.getLength()); writer.append(keyIn, valIn); originalData.put(entry.getKey(), entry.getValue()); keyBuf.reset(); valBuf.reset(); keyIn.reset(); valIn.reset(); } IFile.Reader reader = new InMemoryReader(mergeManager, null, bout.getBuffer(), 0, bout.getBuffer().length); segments.add(new TezMerger.Segment(reader, true)); data.clear(); writer.close(); } return segments; }
From source file:org.apache.tez.runtime.library.shuffle.common.impl.ShuffleManager.java
License:Apache License
public ShuffleManager(InputContext inputContext, Configuration conf, int numInputs, int bufferSize, boolean ifileReadAheadEnabled, int ifileReadAheadLength, CompressionCodec codec, FetchedInputAllocator inputAllocator) throws IOException { this.inputContext = inputContext; this.numInputs = numInputs; this.shuffledInputsCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_SHUFFLED_INPUTS); this.failedShufflesCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_FAILED_SHUFFLE_INPUTS); this.bytesShuffledCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES); this.decompressedDataSizeCounter = inputContext.getCounters() .findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED); this.bytesShuffledToDiskCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_DISK); this.bytesShuffledToMemCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_MEM); this.bytesShuffledDirectDiskCounter = inputContext.getCounters() .findCounter(TaskCounter.SHUFFLE_BYTES_DISK_DIRECT); this.ifileBufferSize = bufferSize; this.ifileReadAhead = ifileReadAheadEnabled; this.ifileReadAheadLength = ifileReadAheadLength; this.codec = codec; this.inputManager = inputAllocator; this.localDiskFetchEnabled = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH_DEFAULT); this.sharedFetchEnabled = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_SHARED_FETCH, TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_SHARED_FETCH_DEFAULT); this.srcNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName()); completedInputSet = Collections.newSetFromMap(new ConcurrentHashMap<InputIdentifier, Boolean>(numInputs)); completedInputs = new LinkedBlockingQueue<FetchedInput>(numInputs); knownSrcHosts = new ConcurrentHashMap<String, InputHost>(); pendingHosts = new LinkedBlockingQueue<InputHost>(); obsoletedInputs = Collections.newSetFromMap(new ConcurrentHashMap<InputAttemptIdentifier, Boolean>()); runningFetchers = Collections.newSetFromMap(new ConcurrentHashMap<Fetcher, Boolean>()); int maxConfiguredFetchers = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES_DEFAULT); this.numFetchers = Math.min(maxConfiguredFetchers, numInputs); ExecutorService fetcherRawExecutor = Executors.newFixedThreadPool(numFetchers, new ThreadFactoryBuilder() .setDaemon(true).setNameFormat("Fetcher [" + srcNameTrimmed + "] #%d " + localhostName).build()); this.fetcherExecutor = MoreExecutors.listeningDecorator(fetcherRawExecutor); ExecutorService schedulerRawExecutor = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder() .setDaemon(true).setNameFormat("ShuffleRunner [" + srcNameTrimmed + "]").build()); this.schedulerExecutor = MoreExecutors.listeningDecorator(schedulerRawExecutor); this.schedulerCallable = new RunShuffleCallable(conf); this.startTime = System.currentTimeMillis(); this.lastProgressTime = startTime; this.shuffleSecret = ShuffleUtils.getJobTokenSecretFromTokenBytes( inputContext.getServiceConsumerMetaData(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID)); httpConnectionParams = ShuffleUtils.constructHttpShuffleConnectionParams(conf); this.localFs = (RawLocalFileSystem) FileSystem.getLocal(conf).getRaw(); this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); this.localDisks = Iterables.toArray(localDirAllocator.getAllLocalPathsToRead(".", conf), Path.class); Arrays.sort(this.localDisks); LOG.info(this.getClass().getSimpleName() + " : numInputs=" + numInputs + ", compressionCodec=" + (codec == null ? "NoCompressionCodec" : codec.getClass().getName()) + ", numFetchers=" + numFetchers + ", ifileBufferSize=" + ifileBufferSize + ", ifileReadAheadEnabled=" + ifileReadAhead + ", ifileReadAheadLength=" + ifileReadAheadLength + ", " + "localDiskFetchEnabled=" + localDiskFetchEnabled + ", " + "sharedFetchEnabled=" + sharedFetchEnabled + ", " + httpConnectionParams.toString()); }
From source file:org.apache.tez.runtime.library.shuffle.common.impl.SimpleFetchedInputAllocator.java
License:Apache License
public SimpleFetchedInputAllocator(String uniqueIdentifier, Configuration conf, long maxTaskAvailableMemory, long memoryAvailable) { this.conf = conf; this.maxAvailableTaskMemory = maxTaskAvailableMemory; this.initialMemoryAvailable = memoryAvailable; this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier); this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); // Setup configuration final float maxInMemCopyUse = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse); }//from ww w. j a va 2 s .c o m long memReq = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, Math.min(maxAvailableTaskMemory, Integer.MAX_VALUE)) * maxInMemCopyUse); if (memReq <= this.initialMemoryAvailable) { this.memoryLimit = memReq; } else { this.memoryLimit = initialMemoryAvailable; } LOG.info("RequestedMem=" + memReq + ", Allocated: " + this.memoryLimit); final float singleShuffleMemoryLimitPercent = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new IllegalArgumentException( "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": " + singleShuffleMemoryLimitPercent); } this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent); LOG.info("SimpleInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: " + this.maxSingleShuffleLimit); }