Example usage for org.apache.hadoop.mapreduce TaskType MAP

List of usage examples for org.apache.hadoop.mapreduce TaskType MAP

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskType MAP.

Prototype

TaskType MAP

To view the source code for org.apache.hadoop.mapreduce TaskType MAP.

Click Source Link

Usage

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }/*from w w w .  j  ava  2 s  .c  o m*/
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceJobSuggestionContext.java

License:Apache License

private MapReduceJobSuggestionContext buildContext() {
    avgMapTimeInSec = avgReduceTimeInSec = avgShuffleTimeInSec = 0;
    numMaps = jobconf.getLong(NUM_MAPS, 0);
    numReduces = jobconf.getLong(NUM_REDUCES, 0);

    for (TaskAttemptExecutionAPIEntity attempt : job.getCompletedTaskAttemptsMap().values()) {
        String taskType = getTaskType(attempt);
        if (Constants.TaskType.MAP.toString().equalsIgnoreCase(taskType)) {
            long mapTime = attempt.getEndTime() - attempt.getStartTime();
            avgMapTimeInSec += mapTime;//from w  w w . jav a 2 s  . c  o  m
            if (firstMap == null || firstMap.getStartTime() > attempt.getStartTime()) {
                firstMap = attempt;
            }
            if (lastMap == null || lastMap.getEndTime() < attempt.getEndTime()) {
                lastMap = attempt;
            }
            if (worstMap == null || (worstMap.getEndTime() - worstMap.getStartTime()) < mapTime) {
                worstMap = attempt;
            }
            long tmpMem = getMinimumIOSortMemory(attempt);
            if (tmpMem > minMapSpillMemBytes) {
                minMapSpillMemBytes = tmpMem;
            }
        } else if (TaskType.REDUCE.toString().equalsIgnoreCase(taskType)) {
            long shuffleTime = attempt.getShuffleFinishTime() - attempt.getStartTime();
            avgShuffleTimeInSec += shuffleTime;
            if (firstShuffle == null || firstShuffle.getStartTime() > attempt.getStartTime()) {
                firstShuffle = attempt;
            }
            if (lastShuffle == null || lastShuffle.getShuffleFinishTime() < attempt.getShuffleFinishTime()) {
                lastShuffle = attempt;
            }
            if (worstShuffle == null
                    || (worstShuffle.getShuffleFinishTime() - worstShuffle.getStartTime()) < shuffleTime) {
                worstShuffle = attempt;
            }

            long reduceTime = attempt.getEndTime() - attempt.getShuffleFinishTime();
            avgReduceTimeInSec += reduceTime;
            if (firstReduce == null || firstReduce.getStartTime() > attempt.getStartTime()) {
                firstReduce = attempt;
            }
            if (lastReduce == null || lastReduce.getEndTime() < attempt.getEndTime()) {
                lastReduce = attempt;
            }
            if (worstReduce == null
                    || (worstReduce.getEndTime() - worstReduce.getShuffleFinishTime()) < reduceTime) {
                worstReduce = attempt;
            }
        }
    }
    if (numMaps > 0) {
        avgMapTimeInSec = avgMapTimeInSec / numMaps / DateTimeUtil.ONESECOND;
    }
    if (numReduces > 0) {
        avgReduceTimeInSec = avgReduceTimeInSec / numReduces / DateTimeUtil.ONESECOND;
        avgShuffleTimeInSec = avgShuffleTimeInSec / numReduces / DateTimeUtil.ONESECOND;
    }
    return this;
}

From source file:org.apache.hcatalog.shims.HCatHadoopShims23.java

License:Apache License

@Override
public TaskID createTaskID() {
    return new TaskID("", 0, TaskType.MAP, 0);
}

From source file:org.apache.hcatalog.shims.HCatHadoopShims23.java

License:Apache License

@Override
public TaskAttemptID createTaskAttemptID() {
    return new TaskAttemptID("", 0, TaskType.MAP, 0, 0);
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * @param type Task type./*  w  ww.  j  a v a 2s .  co m*/
 * @return Hadoop task type.
 */
private TaskType taskType(HadoopTaskType type) {
    switch (type) {
    case SETUP:
        return TaskType.JOB_SETUP;
    case MAP:
    case COMBINE:
        return TaskType.MAP;

    case REDUCE:
        return TaskType.REDUCE;

    case COMMIT:
    case ABORT:
        return TaskType.JOB_CLEANUP;

    default:
        return null;
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java

License:Apache License

/**
 * @param type Task type./*  w  w w.  j av a  2  s .c om*/
 * @return Hadoop task type.
 */
private TaskType taskType(GridHadoopTaskType type) {
    switch (type) {
    case SETUP:
        return TaskType.JOB_SETUP;
    case MAP:
    case COMBINE:
        return TaskType.MAP;

    case REDUCE:
        return TaskType.REDUCE;

    case COMMIT:
    case ABORT:
        return TaskType.JOB_CLEANUP;

    default:
        return null;
    }
}

From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java

License:Apache License

private TaskAttemptID createAttemptID(int jobID, int taskID, int id) {
    return new TaskAttemptID("outputTest", jobID, TaskType.MAP, taskID, 1);
}

From source file:org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormatTests.java

License:Apache License

/**
 * Tests output// w  ww  .j  a va  2 s  .  c o m
 * 
 * @param f
 *            File to output to
 * @param num
 *            Number of tuples to output
 * @throws IOException
 * @throws InterruptedException
 */
protected final void testOutput(File f, int num) throws IOException, InterruptedException {
    // Prepare configuration
    Configuration config = this.prepareConfiguration();

    // Set up fake job
    OutputFormat<NullWritable, T> outputFormat = this.getOutputFormat();
    Job job = Job.getInstance(config);
    job.setOutputFormatClass(outputFormat.getClass());
    this.addOutputPath(f, job.getConfiguration(), job);
    JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
    Assert.assertNotNull(FileOutputFormat.getOutputPath(context));

    // Output the data
    TaskAttemptID id = new TaskAttemptID("outputTest", 1, TaskType.MAP, 1, 1);
    TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), id);
    RecordWriter<NullWritable, T> writer = outputFormat.getRecordWriter(taskContext);
    Iterator<T> tuples = this.generateTuples(num);
    while (tuples.hasNext()) {
        writer.write(NullWritable.get(), tuples.next());
    }
    writer.close(taskContext);

    // Check output
    File outputFile = this.findOutputFile(this.folder.getRoot(), context);
    Assert.assertNotNull(outputFile);
    this.checkTuples(outputFile, num);
}

From source file:org.apache.mnemonic.mapreduce.MneMapreduceBufferDataTest.java

License:Apache License

@BeforeClass
public void setUp() throws IOException {
    m_workdir = new Path(System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
    m_conf = new JobConf();
    m_rand = Utils.createRandom();//from   w  ww . j  av  a2 s  . c  o m
    m_partfns = new ArrayList<String>();

    try {
        m_fs = FileSystem.getLocal(m_conf).getRaw();
        m_fs.delete(m_workdir, true);
        m_fs.mkdirs(m_workdir);
    } catch (IOException e) {
        throw new IllegalStateException("bad fs init", e);
    }

    m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
    m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid);

    MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString());
    MneConfigHelper.setBaseOutputName(m_conf, null, "buffer-data");

    MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME);
    MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID);
    MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX,
            new DurableType[] { DurableType.BUFFER });
    MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX,
            new Class<?>[] {});
    MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME);
    MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID);
    MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
            1024L * 1024 * 1024 * 4);
    MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
            new DurableType[] { DurableType.BUFFER });
    MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
            new Class<?>[] {});
}

From source file:org.apache.mnemonic.mapreduce.MneMapreduceChunkDataTest.java

License:Apache License

@BeforeClass
public void setUp() throws Exception {
    m_workdir = new Path(System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
    m_conf = new JobConf();
    m_rand = Utils.createRandom();//from  w  w w  .ja va2 s .co  m
    unsafe = Utils.getUnsafe();

    try {
        m_fs = FileSystem.getLocal(m_conf).getRaw();
        m_fs.delete(m_workdir, true);
        m_fs.mkdirs(m_workdir);
    } catch (IOException e) {
        throw new IllegalStateException("bad fs init", e);
    }

    m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
    m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid);

    MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString());
    MneConfigHelper.setBaseOutputName(m_conf, null, "chunk-data");

    MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME);
    MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID);
    MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX,
            new DurableType[] { DurableType.CHUNK });
    MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX,
            new Class<?>[] {});
    MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME);
    MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID);
    MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
            1024L * 1024 * 1024 * 4);
    MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
            new DurableType[] { DurableType.CHUNK });
    MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
            new Class<?>[] {});
}