Example usage for org.apache.hadoop.mapreduce TaskType MAP

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskType MAP.

Prototype

TaskType MAP

To view the source code for org.apache.hadoop.mapreduce TaskType MAP.

Click Source Link

Usage

From source file:org.apache.mnemonic.mapreduce.MneMapreduceLongDataTest.java

License:Apache License

@BeforeClass
public void setUp() throws IOException {
    m_workdir = new Path(System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
    m_conf = new JobConf();
    m_rand = Utils.createRandom();/*from   w  ww  . ja  v a  2 s  .  c  om*/

    try {
        m_fs = FileSystem.getLocal(m_conf).getRaw();
        m_fs.delete(m_workdir, true);
        m_fs.mkdirs(m_workdir);
    } catch (IOException e) {
        throw new IllegalStateException("bad fs init", e);
    }

    m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
    m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid);

    MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString());
    MneConfigHelper.setBaseOutputName(m_conf, null, "long-data");

    MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME);
    MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID);
    MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX,
            new DurableType[] { DurableType.LONG });
    MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX,
            new Class<?>[] {});
    MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME);
    MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID);
    MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
            1024L * 1024 * 1024 * 2);
    MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
            new DurableType[] { DurableType.LONG });
    MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
            new Class<?>[] {});
}

From source file:org.apache.mnemonic.mapreduce.MneMapreducePersonDataTest.java

License:Apache License

@BeforeClass
public void setUp() throws IOException {
    m_workdir = new Path(System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
    m_conf = new JobConf();
    m_rand = Utils.createRandom();/*from ww  w.j a va  2  s . com*/

    try {
        m_fs = FileSystem.getLocal(m_conf).getRaw();
        m_fs.delete(m_workdir, true);
        m_fs.mkdirs(m_workdir);
    } catch (IOException e) {
        throw new IllegalStateException("bad fs init", e);
    }

    m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
    m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid);

    MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString());
    MneConfigHelper.setBaseOutputName(m_conf, null, "person-data");

    MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME);
    MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID);
    MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX,
            new DurableType[] { DurableType.DURABLE });
    MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX,
            new Class<?>[] { PersonListEFProxy.class });
    MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME);
    MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID);
    MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
            1024L * 1024 * 1024 * 4);
    MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
            new DurableType[] { DurableType.DURABLE });
    MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
            new Class<?>[] { PersonListEFProxy.class });
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

@Test
public void testPredicatePushdown() throws Exception {
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
    final String typeStr = "struct<i:int,s:string>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>();
    RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext);

    // write 4000 rows with the integer and the binary string
    TypeDescription type = TypeDescription.fromString(typeStr);
    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 4000; ++r) {
        row.setFieldValue(0, new IntWritable(r));
        row.setFieldValue(1, new Text(Integer.toBinaryString(r)));
        writer.write(nada, row);/*w  w  w  . j a  v  a  2s .  com*/
    }
    writer.close(attemptContext);

    OrcInputFormat.setSearchArgument(conf,
            SearchArgumentFactory.newBuilder()
                    .between("i", PredicateLeaf.Type.LONG, new Long(1500), new Long(1999)).build(),
            new String[] { null, "i", "s" });
    FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]);
    RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split,
            attemptContext);
    // the sarg should cause it to skip over the rows except 1000 to 2000
    for (int r = 1000; r < 2000; ++r) {
        assertEquals(true, reader.nextKeyValue());
        row = reader.getCurrentValue();
        assertEquals(r, ((IntWritable) row.getFieldValue(0)).get());
        assertEquals(Integer.toBinaryString(r), row.getFieldValue(1).toString());
    }
    assertEquals(false, reader.nextKeyValue());
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

@Test
public void testColumnSelection() throws Exception {
    String typeStr = "struct<i:int,j:int,k:int>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
    OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>();
    RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext);

    // write 4000 rows with the integer and the binary string
    TypeDescription type = TypeDescription.fromString(typeStr);
    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 3000; ++r) {
        row.setFieldValue(0, new IntWritable(r));
        row.setFieldValue(1, new IntWritable(r * 2));
        row.setFieldValue(2, new IntWritable(r * 3));
        writer.write(nada, row);/*  ww w. j a va  2 s . c  o  m*/
    }
    writer.close(attemptContext);

    conf.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), "0,2");
    FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]);
    RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split,
            attemptContext);
    // the sarg should cause it to skip over the rows except 1000 to 2000
    for (int r = 0; r < 3000; ++r) {
        assertEquals(true, reader.nextKeyValue());
        row = reader.getCurrentValue();
        assertEquals(r, ((IntWritable) row.getFieldValue(0)).get());
        assertEquals(null, row.getFieldValue(1));
        assertEquals(r * 3, ((IntWritable) row.getFieldValue(2)).get());
    }
    assertEquals(false, reader.nextKeyValue());
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

/**
 * Make sure that the writer ignores the OrcKey
 * @throws Exception/*ww  w .  j a va  2 s. c  o m*/
 */
@Test
public void testOrcKey() throws Exception {
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    String TYPE_STRING = "struct<i:int,s:string>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
    TypeDescription schema = TypeDescription.fromString(TYPE_STRING);
    OrcKey key = new OrcKey(new OrcStruct(schema));
    RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 2000; ++r) {
        ((OrcStruct) key.key).setAllFields(new IntWritable(r), new Text(Integer.toString(r)));
        writer.write(nada, key);
    }
    writer.close(attemptContext);
    Path path = new Path(workDir, "part-m-00000.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(2000, file.getNumberOfRows());
    assertEquals(TYPE_STRING, file.getSchema().toString());
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

/**
 * Make sure that the writer ignores the OrcValue
 * @throws Exception//w ww . ja  v  a  2 s . com
 */
@Test
public void testOrcValue() throws Exception {
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    String TYPE_STRING = "struct<i:int>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);

    TypeDescription schema = TypeDescription.fromString(TYPE_STRING);
    OrcValue value = new OrcValue(new OrcStruct(schema));
    RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 3000; ++r) {
        ((OrcStruct) value.value).setAllFields(new IntWritable(r));
        writer.write(nada, value);
    }
    writer.close(attemptContext);
    Path path = new Path(workDir, "part-m-00000.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(3000, file.getNumberOfRows());
    assertEquals(TYPE_STRING, file.getSchema().toString());
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher.java

License:Apache License

private void getStats(Job job, boolean errNotDbg, PigContext pigContext) throws ExecException {
    JobID MRJobID = job.getAssignedJobID();
    String jobMessage = job.getMessage();
    Exception backendException = null;
    if (MRJobID == null) {
        try {//from w w w.  j  a  va2s . com
            LogUtils.writeLog("Backend error message during job submission", jobMessage,
                    pigContext.getProperties().getProperty("pig.logfile"), log);
            backendException = getExceptionFromString(jobMessage);
        } catch (Exception e) {
            int errCode = 2997;
            String msg = "Unable to recreate exception from backend error: " + jobMessage;
            throw new ExecException(msg, errCode, PigException.BUG);
        }
        throw new ExecException(backendException);
    }
    try {
        TaskReport[] mapRep = HadoopShims.getTaskReports(job, TaskType.MAP);
        if (mapRep != null) {
            getErrorMessages(mapRep, "map", errNotDbg, pigContext);
            totalHadoopTimeSpent += computeTimeSpent(mapRep);
            mapRep = null;
        }
        TaskReport[] redRep = HadoopShims.getTaskReports(job, TaskType.REDUCE);
        if (redRep != null) {
            getErrorMessages(redRep, "reduce", errNotDbg, pigContext);
            totalHadoopTimeSpent += computeTimeSpent(redRep);
            redRep = null;
        }
    } catch (IOException e) {
        if (job.getState() == Job.SUCCESS) {
            // if the job succeeded, let the user know that
            // we were unable to get statistics
            log.warn("Unable to get job related diagnostics");
        } else {
            throw new ExecException(e);
        }
    } catch (Exception e) {
        throw new ExecException(e);
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims.java

License:Apache License

static public boolean isMap(TaskAttemptID taskAttemptID) {
    TaskType type = taskAttemptID.getTaskType();
    if (type == TaskType.MAP)
        return true;

    return false;
}

From source file:org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims.java

License:Apache License

static public TaskAttemptID getNewTaskAttemptID() {
    TaskAttemptID taskAttemptID = new TaskAttemptID("", 1, TaskType.MAP, 1, 1);
    return taskAttemptID;
}

From source file:org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims.java

License:Apache License

static public TaskAttemptID createTaskAttemptID(String jtIdentifier, int jobId, boolean isMap, int taskId,
        int id) {
    if (isMap) {/*from   w w  w  .  j  a va 2 s . c  o m*/
        return new TaskAttemptID(jtIdentifier, jobId, TaskType.MAP, taskId, id);
    } else {
        return new TaskAttemptID(jtIdentifier, jobId, TaskType.REDUCE, taskId, id);
    }
}