Example usage for org.apache.hadoop.fs LocalDirAllocator LocalDirAllocator

List of usage examples for org.apache.hadoop.fs LocalDirAllocator LocalDirAllocator

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocalDirAllocator LocalDirAllocator.

Prototype

public LocalDirAllocator(String contextCfgItemName) 

Source Link

Document

Create an allocator object

Usage

From source file:org.apache.tez.runtime.library.common.shuffle.impl.Shuffle.java

License:Apache License

public Shuffle(TezInputContext inputContext, Configuration conf, int numInputs) throws IOException {
    this.inputContext = inputContext;
    this.conf = conf;
    this.metrics = new ShuffleClientMetrics(inputContext.getDAGName(), inputContext.getTaskVertexName(),
            inputContext.getTaskIndex(), this.conf, UserGroupInformation.getCurrentUser().getShortUserName());

    this.numInputs = numInputs;

    this.jobTokenSecret = ShuffleUtils.getJobTokenSecretFromTokenBytes(
            inputContext.getServiceConsumerMetaData(ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID));

    if (ConfigUtils.isIntermediateInputCompressed(conf)) {
        Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(conf,
                DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
    } else {//  w  w  w  . j  a va  2  s . c o  m
        codec = null;
    }
    this.ifileReadAhead = conf.getBoolean(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
            TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
    if (this.ifileReadAhead) {
        this.ifileReadAheadLength = conf.getInt(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
                TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
    } else {
        this.ifileReadAheadLength = 0;
    }

    Combiner combiner = TezRuntimeUtils.instantiateCombiner(conf, inputContext);

    FileSystem localFS = FileSystem.getLocal(this.conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezJobConfig.LOCAL_DIRS);

    // TODO TEZ Get rid of Map / Reduce references.
    TezCounter shuffledMapsCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLED_MAPS);
    TezCounter reduceShuffleBytes = inputContext.getCounters().findCounter(TaskCounter.REDUCE_SHUFFLE_BYTES);
    TezCounter failedShuffleCounter = inputContext.getCounters().findCounter(TaskCounter.FAILED_SHUFFLE);
    TezCounter spilledRecordsCounter = inputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter reduceCombineInputCounter = inputContext.getCounters()
            .findCounter(TaskCounter.COMBINE_INPUT_RECORDS);
    TezCounter mergedMapOutputsCounter = inputContext.getCounters().findCounter(TaskCounter.MERGED_MAP_OUTPUTS);

    LOG.info("Shuffle assigned with " + numInputs + " inputs" + ", codec: "
            + (codec == null ? "None" : codec.getClass().getName()) + "ifileReadAhead: " + ifileReadAhead);

    scheduler = new ShuffleScheduler(this.inputContext, this.conf, this.numInputs, this, shuffledMapsCounter,
            reduceShuffleBytes, failedShuffleCounter);
    eventHandler = new ShuffleInputEventHandler(inputContext, scheduler);
    merger = new MergeManager(this.conf, localFS, localDirAllocator, inputContext, combiner,
            spilledRecordsCounter, reduceCombineInputCounter, mergedMapOutputsCounter, this);
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.ShuffleManager.java

License:Apache License

public ShuffleManager(InputContext inputContext, Configuration conf, int numInputs, int bufferSize,
        boolean ifileReadAheadEnabled, int ifileReadAheadLength, CompressionCodec codec,
        FetchedInputAllocator inputAllocator) throws IOException {
    this.inputContext = inputContext;
    this.numInputs = numInputs;

    this.shuffledInputsCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_SHUFFLED_INPUTS);
    this.failedShufflesCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_FAILED_SHUFFLE_INPUTS);
    this.bytesShuffledCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES);
    this.decompressedDataSizeCounter = inputContext.getCounters()
            .findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED);
    this.bytesShuffledToDiskCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_DISK);
    this.bytesShuffledToMemCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_MEM);
    this.bytesShuffledDirectDiskCounter = inputContext.getCounters()
            .findCounter(TaskCounter.SHUFFLE_BYTES_DISK_DIRECT);

    this.ifileBufferSize = bufferSize;
    this.ifileReadAhead = ifileReadAheadEnabled;
    this.ifileReadAheadLength = ifileReadAheadLength;
    this.codec = codec;
    this.inputManager = inputAllocator;
    this.localDiskFetchEnabled = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH,
            TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH_DEFAULT);
    this.sharedFetchEnabled = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_SHARED_FETCH,
            TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_SHARED_FETCH_DEFAULT);

    this.shufflePhaseTime = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_PHASE_TIME);
    this.firstEventReceived = inputContext.getCounters().findCounter(TaskCounter.FIRST_EVENT_RECEIVED);
    this.lastEventReceived = inputContext.getCounters().findCounter(TaskCounter.LAST_EVENT_RECEIVED);

    this.srcNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName());

    completedInputSet = Collections.newSetFromMap(new ConcurrentHashMap<InputIdentifier, Boolean>(numInputs));
    completedInputs = new LinkedBlockingQueue<FetchedInput>(numInputs);
    knownSrcHosts = new ConcurrentHashMap<String, InputHost>();
    pendingHosts = new LinkedBlockingQueue<InputHost>();
    obsoletedInputs = Collections.newSetFromMap(new ConcurrentHashMap<InputAttemptIdentifier, Boolean>());
    runningFetchers = Collections.newSetFromMap(new ConcurrentHashMap<Fetcher, Boolean>());

    int maxConfiguredFetchers = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES_DEFAULT);

    this.numFetchers = Math.min(maxConfiguredFetchers, numInputs);

    ExecutorService fetcherRawExecutor = Executors.newFixedThreadPool(numFetchers, new ThreadFactoryBuilder()
            .setDaemon(true).setNameFormat("Fetcher [" + srcNameTrimmed + "] #%d").build());
    this.fetcherExecutor = MoreExecutors.listeningDecorator(fetcherRawExecutor);

    ExecutorService schedulerRawExecutor = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder()
            .setDaemon(true).setNameFormat("ShuffleRunner [" + srcNameTrimmed + "]").build());
    this.schedulerExecutor = MoreExecutors.listeningDecorator(schedulerRawExecutor);
    this.schedulerCallable = new RunShuffleCallable(conf);

    this.startTime = System.currentTimeMillis();
    this.lastProgressTime = startTime;

    SecretKey shuffleSecret = ShuffleUtils.getJobTokenSecretFromTokenBytes(
            inputContext.getServiceConsumerMetaData(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID));
    this.jobTokenSecretMgr = new JobTokenSecretManager(shuffleSecret);
    httpConnectionParams = ShuffleUtils.constructHttpShuffleConnectionParams(conf);

    this.localFs = (RawLocalFileSystem) FileSystem.getLocal(conf).getRaw();

    this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);

    this.localDisks = Iterables.toArray(localDirAllocator.getAllLocalPathsToRead(".", conf), Path.class);

    Arrays.sort(this.localDisks);

    LOG.info(this.getClass().getSimpleName() + " : numInputs=" + numInputs + ", compressionCodec="
            + (codec == null ? "NoCompressionCodec" : codec.getClass().getName()) + ", numFetchers="
            + numFetchers + ", ifileBufferSize=" + ifileBufferSize + ", ifileReadAheadEnabled=" + ifileReadAhead
            + ", ifileReadAheadLength=" + ifileReadAheadLength + ", " + "localDiskFetchEnabled="
            + localDiskFetchEnabled + ", " + "sharedFetchEnabled=" + sharedFetchEnabled + ", "
            + httpConnectionParams.toString());
}

From source file:org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator.java

License:Apache License

public SimpleFetchedInputAllocator(String uniqueIdentifier, Configuration conf, long maxTaskAvailableMemory,
        long memoryAvailable) {
    this.conf = conf;
    this.maxAvailableTaskMemory = maxTaskAvailableMemory;
    this.initialMemoryAvailable = memoryAvailable;

    this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier);
    this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);

    // Setup configuration
    final float maxInMemCopyUse = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }/* ww  w .j a v  a2  s .  co m*/

    long memReq = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            Math.min(maxAvailableTaskMemory, Integer.MAX_VALUE)) * maxInMemCopyUse);

    if (memReq <= this.initialMemoryAvailable) {
        this.memoryLimit = memReq;
    } else {
        this.memoryLimit = initialMemoryAvailable;
    }

    LOG.info("RequestedMem=" + memReq + ", Allocated: " + this.memoryLimit);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    //TODO: cap it to MAX_VALUE until MemoryFetchedInput can handle > 2 GB
    this.maxSingleShuffleLimit = (long) Math.min((memoryLimit * singleShuffleMemoryLimitPercent),
            Integer.MAX_VALUE);

    LOG.info("SimpleInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: "
            + this.maxSingleShuffleLimit);
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.java

License:Apache License

@VisibleForTesting
protected Path getShuffleInputFileName(String pathComponent, String suffix) throws IOException {
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    suffix = suffix != null ? suffix : "";

    String pathFromLocalDir = Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR + Path.SEPARATOR + pathComponent
            + Path.SEPARATOR + Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING + suffix;

    return localDirAllocator.getLocalPathToRead(pathFromLocalDir.toString(), conf);
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle.java

License:Apache License

public Shuffle(InputContext inputContext, Configuration conf, int numInputs, long initialMemoryAvailable)
        throws IOException {
    this.inputContext = inputContext;
    this.conf = conf;
    this.httpConnectionParams = ShuffleUtils.constructHttpShuffleConnectionParams(conf);
    this.metrics = new ShuffleClientMetrics(inputContext.getDAGName(), inputContext.getTaskVertexName(),
            inputContext.getTaskIndex(), this.conf, UserGroupInformation.getCurrentUser().getShortUserName());

    this.srcNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName());

    this.jobTokenSecret = ShuffleUtils.getJobTokenSecretFromTokenBytes(
            inputContext.getServiceConsumerMetaData(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID));
    this.jobTokenSecretMgr = new JobTokenSecretManager(jobTokenSecret);

    if (ConfigUtils.isIntermediateInputCompressed(conf)) {
        Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(conf,
                DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
    } else {/* ww  w . ja va 2 s . c o  m*/
        codec = null;
    }
    this.ifileReadAhead = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD,
            TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
    if (this.ifileReadAhead) {
        this.ifileReadAheadLength = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
                TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
    } else {
        this.ifileReadAheadLength = 0;
    }

    Combiner combiner = TezRuntimeUtils.instantiateCombiner(conf, inputContext);

    FileSystem localFS = FileSystem.getLocal(this.conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);

    // TODO TEZ Get rid of Map / Reduce references.
    TezCounter shuffledInputsCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_SHUFFLED_INPUTS);
    TezCounter reduceShuffleBytes = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES);
    TezCounter reduceDataSizeDecompressed = inputContext.getCounters()
            .findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED);
    TezCounter failedShuffleCounter = inputContext.getCounters()
            .findCounter(TaskCounter.NUM_FAILED_SHUFFLE_INPUTS);
    TezCounter spilledRecordsCounter = inputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter reduceCombineInputCounter = inputContext.getCounters()
            .findCounter(TaskCounter.COMBINE_INPUT_RECORDS);
    TezCounter mergedMapOutputsCounter = inputContext.getCounters().findCounter(TaskCounter.MERGED_MAP_OUTPUTS);
    TezCounter bytesShuffedToDisk = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_DISK);
    TezCounter bytesShuffedToDiskDirect = inputContext.getCounters()
            .findCounter(TaskCounter.SHUFFLE_BYTES_DISK_DIRECT);
    TezCounter bytesShuffedToMem = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_MEM);

    LOG.info("Shuffle assigned with " + numInputs + " inputs" + ", codec: "
            + (codec == null ? "None" : codec.getClass().getName()) + "ifileReadAhead: " + ifileReadAhead);

    boolean sslShuffle = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL_DEFAULT);
    startTime = System.currentTimeMillis();
    scheduler = new ShuffleScheduler(this.inputContext, this.conf, numInputs, this, shuffledInputsCounter,
            reduceShuffleBytes, reduceDataSizeDecompressed, failedShuffleCounter, bytesShuffedToDisk,
            bytesShuffedToDiskDirect, bytesShuffedToMem, startTime);
    this.mergePhaseTime = inputContext.getCounters().findCounter(TaskCounter.MERGE_PHASE_TIME);
    this.shufflePhaseTime = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_PHASE_TIME);

    merger = new MergeManager(this.conf, localFS, localDirAllocator, inputContext, combiner,
            spilledRecordsCounter, reduceCombineInputCounter, mergedMapOutputsCounter, this,
            initialMemoryAvailable, codec, ifileReadAhead, ifileReadAheadLength);

    eventHandler = new ShuffleInputEventHandlerOrderedGrouped(inputContext, scheduler, sslShuffle);

    ExecutorService rawExecutor = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder().setDaemon(true)
            .setNameFormat("ShuffleAndMergeRunner [" + srcNameTrimmed + "]").build());

    int configuredNumFetchers = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES_DEFAULT);
    numFetchers = Math.min(configuredNumFetchers, numInputs);
    LOG.info("Num fetchers being started: " + numFetchers);
    fetchers = Lists.newArrayListWithCapacity(numFetchers);
    localDiskFetchEnabled = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH,
            TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH_DEFAULT);

    executor = MoreExecutors.listeningDecorator(rawExecutor);
    runShuffleCallable = new RunShuffleCallable();
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.TestMergeManager.java

License:Apache License

@Test(timeout = 10000)
public void testConfigs() throws IOException {
    long maxTaskMem = 8192 * 1024 * 1024l;

    //Test Shuffle fetch buffer and post merge buffer percentage
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.8f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.5f);
    Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) == 6871947776l);

    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.5f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.5f);
    Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) > Integer.MAX_VALUE);

    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.9f);
    Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) > Integer.MAX_VALUE);

    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.1f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.1f);
    Assert.assertTrue(MergeManager.getInitialMemoryRequirement(conf, maxTaskMem) < Integer.MAX_VALUE);

    try {// w  ww  . j  a va  2s  . c o m
        conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 2.4f);
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
        Assert.fail("Should have thrown wrong buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }

    try {
        conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, -2.4f);
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
        Assert.fail("Should have thrown wrong buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }

    try {
        conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 1.4f);
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
        Assert.fail("Should have thrown wrong post merge buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }

    try {
        conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, -1.4f);
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
        Assert.fail("Should have thrown wrong post merge buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }

    try {
        conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 1.4f);
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
        Assert.fail("Should have thrown wrong shuffle fetch buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }

    try {
        conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, -1.4f);
        MergeManager.getInitialMemoryRequirement(conf, maxTaskMem);
        Assert.fail("Should have thrown wrong shuffle fetch buffer percent configuration exception");
    } catch (IllegalArgumentException ie) {
    }

    //test post merge mem limit
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f);
    conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0.8f);
    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext t0inputContext = createMockInputContext(UUID.randomUUID().toString(), maxTaskMem);
    ExceptionReporter t0exceptionReporter = mock(ExceptionReporter.class);
    long initialMemoryAvailable = (long) (maxTaskMem * 0.8);
    MergeManager mergeManager = new MergeManager(conf, localFs, localDirAllocator, t0inputContext, null, null,
            null, null, t0exceptionReporter, initialMemoryAvailable, null, false, -1);
    Assert.assertTrue(mergeManager.postMergeMemLimit > Integer.MAX_VALUE);

    initialMemoryAvailable = 200 * 1024 * 1024l; //initial mem < memlimit
    mergeManager = new MergeManager(conf, localFs, localDirAllocator, t0inputContext, null, null, null, null,
            t0exceptionReporter, initialMemoryAvailable, null, false, -1);
    Assert.assertTrue(mergeManager.postMergeMemLimit == initialMemoryAvailable);
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.TestMergeManager.java

License:Apache License

@Test(timeout = 10000)
public void testLocalDiskMergeMultipleTasks() throws IOException {

    Configuration conf = new TezConfiguration(defaultConf);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());

    Path localDir = new Path(workDir, "local");
    Path srcDir = new Path(workDir, "srcData");
    localFs.mkdirs(localDir);//from w ww .  java  2 s.com
    localFs.mkdirs(srcDir);

    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());

    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext t0inputContext = createMockInputContext(UUID.randomUUID().toString());
    InputContext t1inputContext = createMockInputContext(UUID.randomUUID().toString());

    ExceptionReporter t0exceptionReporter = mock(ExceptionReporter.class);
    ExceptionReporter t1exceptionReporter = mock(ExceptionReporter.class);

    MergeManager t0mergeManagerReal = new MergeManager(conf, localFs, localDirAllocator, t0inputContext, null,
            null, null, null, t0exceptionReporter, 2000000, null, false, -1);
    MergeManager t0mergeManager = spy(t0mergeManagerReal);

    MergeManager t1mergeManagerReal = new MergeManager(conf, localFs, localDirAllocator, t1inputContext, null,
            null, null, null, t1exceptionReporter, 2000000, null, false, -1);
    MergeManager t1mergeManager = spy(t1mergeManagerReal);

    // Partition 0 Keys 0-2, Partition 1 Keys 3-5
    SrcFileInfo src1Info = createFile(conf, localFs,
            new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src1.out"), 2, 3, 0);
    // Partition 0 Keys 6-8, Partition 1 Keys 9-11
    SrcFileInfo src2Info = createFile(conf, localFs,
            new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src2.out"), 2, 3, 6);

    // Simulating Task 0 fetches partition 0. (targetIndex = 0,1)

    // Simulating Task 1 fetches partition 1. (targetIndex = 0,1)

    InputAttemptIdentifier t0Identifier0 = new InputAttemptIdentifier(0, 0, src1Info.path.getName());
    InputAttemptIdentifier t0Identifier1 = new InputAttemptIdentifier(1, 0, src2Info.path.getName());

    InputAttemptIdentifier t1Identifier0 = new InputAttemptIdentifier(0, 0, src1Info.path.getName());
    InputAttemptIdentifier t1Identifier1 = new InputAttemptIdentifier(1, 0, src2Info.path.getName());

    MapOutput t0MapOutput0 = getMapOutputForDirectDiskFetch(t0Identifier0, src1Info.path,
            src1Info.indexedRecords[0], t0mergeManager);
    MapOutput t0MapOutput1 = getMapOutputForDirectDiskFetch(t0Identifier1, src2Info.path,
            src2Info.indexedRecords[0], t0mergeManager);

    MapOutput t1MapOutput0 = getMapOutputForDirectDiskFetch(t1Identifier0, src1Info.path,
            src1Info.indexedRecords[1], t1mergeManager);
    MapOutput t1MapOutput1 = getMapOutputForDirectDiskFetch(t1Identifier1, src2Info.path,
            src2Info.indexedRecords[1], t1mergeManager);

    t0MapOutput0.commit();
    t0MapOutput1.commit();
    verify(t0mergeManager).closeOnDiskFile(t0MapOutput0.getOutputPath());
    verify(t0mergeManager).closeOnDiskFile(t0MapOutput1.getOutputPath());
    // Run the OnDiskMerge via MergeManager
    // Simulate the thread invocation - remove files, and invoke merge
    List<FileChunk> t0MergeFiles = new LinkedList<FileChunk>();
    t0MergeFiles.addAll(t0mergeManager.onDiskMapOutputs);
    t0mergeManager.onDiskMapOutputs.clear();
    t0mergeManager.onDiskMerger.merge(t0MergeFiles);
    Assert.assertEquals(1, t0mergeManager.onDiskMapOutputs.size());

    t1MapOutput0.commit();
    t1MapOutput1.commit();
    verify(t1mergeManager).closeOnDiskFile(t1MapOutput0.getOutputPath());
    verify(t1mergeManager).closeOnDiskFile(t1MapOutput1.getOutputPath());
    // Run the OnDiskMerge via MergeManager
    // Simulate the thread invocation - remove files, and invoke merge
    List<FileChunk> t1MergeFiles = new LinkedList<FileChunk>();
    t1MergeFiles.addAll(t1mergeManager.onDiskMapOutputs);
    t1mergeManager.onDiskMapOutputs.clear();
    t1mergeManager.onDiskMerger.merge(t1MergeFiles);
    Assert.assertEquals(1, t1mergeManager.onDiskMapOutputs.size());

    Assert.assertNotEquals(t0mergeManager.onDiskMapOutputs.iterator().next().getPath(),
            t1mergeManager.onDiskMapOutputs.iterator().next().getPath());

    Assert.assertTrue(t0mergeManager.onDiskMapOutputs.iterator().next().getPath().toString()
            .contains(t0inputContext.getUniqueIdentifier()));
    Assert.assertTrue(t1mergeManager.onDiskMapOutputs.iterator().next().getPath().toString()
            .contains(t1inputContext.getUniqueIdentifier()));

}

From source file:org.apache.tez.runtime.library.common.TestValuesIterator.java

License:Apache License

/**
 * create inmemory segments/*w w  w .  j  a  v  a 2 s .c om*/
 *
 * @return
 * @throws IOException
 */
public List<TezMerger.Segment> createInMemStreams() throws IOException {
    int numberOfStreams = Math.max(2, rnd.nextInt(10));
    LOG.info("No of streams : " + numberOfStreams);

    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Serializer keySerializer = serializationFactory.getSerializer(keyClass);
    Serializer valueSerializer = serializationFactory.getSerializer(valClass);

    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext context = createTezInputContext();
    MergeManager mergeManager = new MergeManager(conf, fs, localDirAllocator, context, null, null, null, null,
            null, 1024 * 1024 * 10, null, false, -1);

    DataOutputBuffer keyBuf = new DataOutputBuffer();
    DataOutputBuffer valBuf = new DataOutputBuffer();
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    keySerializer.open(keyBuf);
    valueSerializer.open(valBuf);

    List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>();
    for (int i = 0; i < numberOfStreams; i++) {
        BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
        InMemoryWriter writer = new InMemoryWriter(bout);
        Map<Writable, Writable> data = createData();
        //write data
        for (Map.Entry<Writable, Writable> entry : data.entrySet()) {
            keySerializer.serialize(entry.getKey());
            valueSerializer.serialize(entry.getValue());
            keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength());
            valIn.reset(valBuf.getData(), 0, valBuf.getLength());
            writer.append(keyIn, valIn);
            originalData.put(entry.getKey(), entry.getValue());
            keyBuf.reset();
            valBuf.reset();
            keyIn.reset();
            valIn.reset();
        }
        IFile.Reader reader = new InMemoryReader(mergeManager, null, bout.getBuffer(), 0,
                bout.getBuffer().length);
        segments.add(new TezMerger.Segment(reader, true));

        data.clear();
        writer.close();
    }
    return segments;
}

From source file:org.apache.tez.runtime.library.shuffle.common.impl.ShuffleManager.java

License:Apache License

public ShuffleManager(InputContext inputContext, Configuration conf, int numInputs, int bufferSize,
        boolean ifileReadAheadEnabled, int ifileReadAheadLength, CompressionCodec codec,
        FetchedInputAllocator inputAllocator) throws IOException {
    this.inputContext = inputContext;
    this.numInputs = numInputs;

    this.shuffledInputsCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_SHUFFLED_INPUTS);
    this.failedShufflesCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_FAILED_SHUFFLE_INPUTS);
    this.bytesShuffledCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES);
    this.decompressedDataSizeCounter = inputContext.getCounters()
            .findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED);
    this.bytesShuffledToDiskCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_DISK);
    this.bytesShuffledToMemCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_MEM);
    this.bytesShuffledDirectDiskCounter = inputContext.getCounters()
            .findCounter(TaskCounter.SHUFFLE_BYTES_DISK_DIRECT);

    this.ifileBufferSize = bufferSize;
    this.ifileReadAhead = ifileReadAheadEnabled;
    this.ifileReadAheadLength = ifileReadAheadLength;
    this.codec = codec;
    this.inputManager = inputAllocator;
    this.localDiskFetchEnabled = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH,
            TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH_DEFAULT);
    this.sharedFetchEnabled = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_SHARED_FETCH,
            TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_SHARED_FETCH_DEFAULT);

    this.srcNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName());

    completedInputSet = Collections.newSetFromMap(new ConcurrentHashMap<InputIdentifier, Boolean>(numInputs));
    completedInputs = new LinkedBlockingQueue<FetchedInput>(numInputs);
    knownSrcHosts = new ConcurrentHashMap<String, InputHost>();
    pendingHosts = new LinkedBlockingQueue<InputHost>();
    obsoletedInputs = Collections.newSetFromMap(new ConcurrentHashMap<InputAttemptIdentifier, Boolean>());
    runningFetchers = Collections.newSetFromMap(new ConcurrentHashMap<Fetcher, Boolean>());

    int maxConfiguredFetchers = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES_DEFAULT);

    this.numFetchers = Math.min(maxConfiguredFetchers, numInputs);

    ExecutorService fetcherRawExecutor = Executors.newFixedThreadPool(numFetchers, new ThreadFactoryBuilder()
            .setDaemon(true).setNameFormat("Fetcher [" + srcNameTrimmed + "] #%d " + localhostName).build());
    this.fetcherExecutor = MoreExecutors.listeningDecorator(fetcherRawExecutor);

    ExecutorService schedulerRawExecutor = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder()
            .setDaemon(true).setNameFormat("ShuffleRunner [" + srcNameTrimmed + "]").build());
    this.schedulerExecutor = MoreExecutors.listeningDecorator(schedulerRawExecutor);
    this.schedulerCallable = new RunShuffleCallable(conf);

    this.startTime = System.currentTimeMillis();
    this.lastProgressTime = startTime;

    this.shuffleSecret = ShuffleUtils.getJobTokenSecretFromTokenBytes(
            inputContext.getServiceConsumerMetaData(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID));
    httpConnectionParams = ShuffleUtils.constructHttpShuffleConnectionParams(conf);

    this.localFs = (RawLocalFileSystem) FileSystem.getLocal(conf).getRaw();

    this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);

    this.localDisks = Iterables.toArray(localDirAllocator.getAllLocalPathsToRead(".", conf), Path.class);

    Arrays.sort(this.localDisks);

    LOG.info(this.getClass().getSimpleName() + " : numInputs=" + numInputs + ", compressionCodec="
            + (codec == null ? "NoCompressionCodec" : codec.getClass().getName()) + ", numFetchers="
            + numFetchers + ", ifileBufferSize=" + ifileBufferSize + ", ifileReadAheadEnabled=" + ifileReadAhead
            + ", ifileReadAheadLength=" + ifileReadAheadLength + ", " + "localDiskFetchEnabled="
            + localDiskFetchEnabled + ", " + "sharedFetchEnabled=" + sharedFetchEnabled + ", "
            + httpConnectionParams.toString());
}

From source file:org.apache.tez.runtime.library.shuffle.common.impl.SimpleFetchedInputAllocator.java

License:Apache License

public SimpleFetchedInputAllocator(String uniqueIdentifier, Configuration conf, long maxTaskAvailableMemory,
        long memoryAvailable) {
    this.conf = conf;
    this.maxAvailableTaskMemory = maxTaskAvailableMemory;
    this.initialMemoryAvailable = memoryAvailable;

    this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier);
    this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);

    // Setup configuration
    final float maxInMemCopyUse = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }//from   ww  w.  j  a  va 2 s  .c o m

    long memReq = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            Math.min(maxAvailableTaskMemory, Integer.MAX_VALUE)) * maxInMemCopyUse);

    if (memReq <= this.initialMemoryAvailable) {
        this.memoryLimit = memReq;
    } else {
        this.memoryLimit = initialMemoryAvailable;
    }

    LOG.info("RequestedMem=" + memReq + ", Allocated: " + this.memoryLimit);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent);

    LOG.info("SimpleInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: "
            + this.maxSingleShuffleLimit);
}