Example usage for org.apache.hadoop.mapreduce JobID JobID

List of usage examples for org.apache.hadoop.mapreduce JobID JobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobID JobID.

Prototype

public JobID(String jtIdentifier, int id) 

Source Link

Document

Constructs a JobID object

Usage

From source file:org.apache.parquet.hadoop.codec.CodecConfigTest.java

License:Apache License

public void shouldUseParquetFlagToSetCodec(String codecNameStr, CompressionCodecName expectedCodec)
        throws IOException {

    //Test mapreduce API
    Job job = new Job();
    Configuration conf = job.getConfiguration();
    conf.set(ParquetOutputFormat.COMPRESSION, codecNameStr);
    TaskAttemptContext task = ContextUtil.newTaskAttemptContext(conf,
            new TaskAttemptID(new TaskID(new JobID("test", 1), false, 1), 1));
    Assert.assertEquals(CodecConfig.from(task).getCodec(), expectedCodec);

    //Test mapred API
    JobConf jobConf = new JobConf();
    jobConf.set(ParquetOutputFormat.COMPRESSION, codecNameStr);
    Assert.assertEquals(CodecConfig.from(jobConf).getCodec(), expectedCodec);
}

From source file:org.apache.parquet.hadoop.codec.CodecConfigTest.java

License:Apache License

public void shouldUseHadoopFlagToSetCodec(String codecClassStr, CompressionCodecName expectedCodec)
        throws IOException {
    //Test mapreduce API
    Job job = new Job();
    Configuration conf = job.getConfiguration();
    conf.setBoolean("mapred.output.compress", true);
    conf.set("mapred.output.compression.codec", codecClassStr);
    TaskAttemptContext task = ContextUtil.newTaskAttemptContext(conf,
            new TaskAttemptID(new TaskID(new JobID("test", 1), false, 1), 1));
    Assert.assertEquals(expectedCodec, CodecConfig.from(task).getCodec());

    //Test mapred API
    JobConf jobConf = new JobConf();
    jobConf.setBoolean("mapred.output.compress", true);
    jobConf.set("mapred.output.compression.codec", codecClassStr);
    Assert.assertEquals(CodecConfig.from(jobConf).getCodec(), expectedCodec);
}

From source file:org.apache.parquet.hadoop.thrift.TestParquetToThriftReadWriteAndProjection.java

License:Apache License

private <T extends TBase<?, ?>> void shouldDoProjection(Configuration conf, T recordToWrite,
        T exptectedReadResult, Class<? extends TBase<?, ?>> thriftClass) throws Exception {
    final Path parquetFile = new Path("target/test/TestParquetToThriftReadWriteAndProjection/file.parquet");
    final FileSystem fs = parquetFile.getFileSystem(conf);
    if (fs.exists(parquetFile)) {
        fs.delete(parquetFile, true);/*from  ww  w  .  j a  va 2  s. com*/
    }

    //create a test file
    final TProtocolFactory protocolFactory = new TCompactProtocol.Factory();
    final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0);
    final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile,
            ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, thriftClass);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos));

    recordToWrite.write(protocol);
    w.write(new BytesWritable(baos.toByteArray()));
    w.close();

    final ParquetThriftInputFormat<T> parquetThriftInputFormat = new ParquetThriftInputFormat<T>();
    final Job job = new Job(conf, "read");
    job.setInputFormatClass(ParquetThriftInputFormat.class);
    ParquetThriftInputFormat.setInputPaths(job, parquetFile);
    final JobID jobID = new JobID("local", 1);
    List<InputSplit> splits = parquetThriftInputFormat
            .getSplits(ContextUtil.newJobContext(ContextUtil.getConfiguration(job), jobID));
    T readValue = null;
    for (InputSplit split : splits) {
        TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(
                ContextUtil.getConfiguration(job), new TaskAttemptID(new TaskID(jobID, true, 1), 0));
        final RecordReader<Void, T> reader = parquetThriftInputFormat.createRecordReader(split,
                taskAttemptContext);
        reader.initialize(split, taskAttemptContext);
        if (reader.nextKeyValue()) {
            readValue = reader.getCurrentValue();
            LOG.info(readValue);
        }
    }
    assertEquals(exptectedReadResult, readValue);

}

From source file:org.apache.parquet.pig.PerfTest2.java

License:Apache License

public static void write(String out) throws IOException, ParserException, InterruptedException, ExecException {
    {/*from  w  ww . j  ava  2  s  .  c o m*/
        StringBuilder schemaString = new StringBuilder("a0: chararray");
        for (int i = 1; i < COLUMN_COUNT; i++) {
            schemaString.append(", a" + i + ": chararray");
        }

        String location = out;
        String schema = schemaString.toString();

        StoreFuncInterface storer = new ParquetStorer();
        Job job = new Job(conf);
        storer.setStoreFuncUDFContextSignature("sig");
        String absPath = storer.relToAbsPathForStoreLocation(location,
                new Path(new File(".").getAbsoluteFile().toURI()));
        storer.setStoreLocation(absPath, job);
        storer.checkSchema(new ResourceSchema(Utils.getSchemaFromString(schema)));
        @SuppressWarnings("unchecked") // that's how the base class is defined
        OutputFormat<Void, Tuple> outputFormat = storer.getOutputFormat();
        // it's ContextUtil.getConfiguration(job) and not just conf !
        JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job),
                new JobID("jt", jobid++));
        outputFormat.checkOutputSpecs(jobContext);
        if (schema != null) {
            ResourceSchema resourceSchema = new ResourceSchema(Utils.getSchemaFromString(schema));
            storer.checkSchema(resourceSchema);
            if (storer instanceof StoreMetadata) {
                ((StoreMetadata) storer).storeSchema(resourceSchema, absPath, job);
            }
        }
        TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(
                ContextUtil.getConfiguration(job), new TaskAttemptID("jt", jobid, true, 1, 0));
        RecordWriter<Void, Tuple> recordWriter = outputFormat.getRecordWriter(taskAttemptContext);
        storer.prepareToWrite(recordWriter);

        for (int i = 0; i < ROW_COUNT; i++) {
            Tuple tuple = TupleFactory.getInstance().newTuple(COLUMN_COUNT);
            for (int j = 0; j < COLUMN_COUNT; j++) {
                tuple.set(j, "a" + i + "_" + j);
            }
            storer.putNext(tuple);
        }

        recordWriter.close(taskAttemptContext);
        OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskAttemptContext);
        outputCommitter.commitTask(taskAttemptContext);
        outputCommitter.commitJob(jobContext);

    }
}

From source file:org.apache.parquet.pig.PerfTest2.java

License:Apache License

static void load(String out, int colsToLoad, StringBuilder results) throws Exception {
    StringBuilder schemaString = new StringBuilder("a0: chararray");
    for (int i = 1; i < colsToLoad; i++) {
        schemaString.append(", a" + i + ": chararray");
    }/*from w  w w .  j  a v  a 2 s.c  om*/

    long t0 = System.currentTimeMillis();
    Job job = new Job(conf);
    int loadjobId = jobid++;
    LoadFunc loadFunc = new ParquetLoader(schemaString.toString());
    loadFunc.setUDFContextSignature("sigLoader" + loadjobId);
    String absPath = loadFunc.relativeToAbsolutePath(out, new Path(new File(".").getAbsoluteFile().toURI()));
    loadFunc.setLocation(absPath, job);
    @SuppressWarnings("unchecked") // that's how the base class is defined
    InputFormat<Void, Tuple> inputFormat = loadFunc.getInputFormat();
    JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job),
            new JobID("jt", loadjobId));
    List<InputSplit> splits = inputFormat.getSplits(jobContext);
    int i = 0;
    int taskid = 0;
    for (InputSplit split : splits) {
        TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(
                ContextUtil.getConfiguration(job), new TaskAttemptID("jt", loadjobId, true, taskid++, 0));
        RecordReader<Void, Tuple> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
        loadFunc.prepareToRead(recordReader, null);
        recordReader.initialize(split, taskAttemptContext);
        Tuple t;
        while ((t = loadFunc.getNext()) != null) {
            if (Log.DEBUG)
                System.out.println(t);
            ++i;
        }
    }
    assertEquals(ROW_COUNT, i);
    long t1 = System.currentTimeMillis();
    results.append((t1 - t0) + " ms to read " + colsToLoad + " columns\n");
}

From source file:org.apache.tajo.storage.hbase.HBaseStorageManager.java

License:Apache License

@Override
public Path commitOutputData(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan,
        Schema schema, TableDesc tableDesc) throws IOException {
    if (tableDesc == null) {
        throw new IOException("TableDesc is null while calling loadIncrementalHFiles: " + finalEbId);
    }//from w ww .  j av a  2 s  . c om
    Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR));
    Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);

    Configuration hbaseConf = HBaseStorageManager.getHBaseConfiguration(queryContext.getConf(),
            tableDesc.getMeta());
    hbaseConf.set("hbase.loadincremental.threads.max", "2");

    JobContextImpl jobContext = new JobContextImpl(hbaseConf,
            new JobID(finalEbId.getQueryId().toString(), finalEbId.getId()));

    FileOutputCommitter committer = new FileOutputCommitter(stagingResultDir, jobContext);
    Path jobAttemptPath = committer.getJobAttemptPath(jobContext);
    FileSystem fs = jobAttemptPath.getFileSystem(queryContext.getConf());
    if (!fs.exists(jobAttemptPath) || fs.listStatus(jobAttemptPath) == null) {
        LOG.warn("No query attempt file in " + jobAttemptPath);
        return stagingResultDir;
    }
    committer.commitJob(jobContext);

    if (tableDesc.getName() == null && tableDesc.getPath() != null) {

        // insert into location
        return super.commitOutputData(queryContext, finalEbId, plan, schema, tableDesc, false);
    } else {
        // insert into table
        String tableName = tableDesc.getMeta().getOption(HBaseStorageConstants.META_TABLE_KEY);

        HTable htable = new HTable(hbaseConf, tableName);
        try {
            LoadIncrementalHFiles loadIncrementalHFiles = null;
            try {
                loadIncrementalHFiles = new LoadIncrementalHFiles(hbaseConf);
            } catch (Exception e) {
                LOG.error(e.getMessage(), e);
                throw new IOException(e.getMessage(), e);
            }
            loadIncrementalHFiles.doBulkLoad(stagingResultDir, htable);

            return stagingResultDir;
        } finally {
            htable.close();
        }
    }
}

From source file:org.apache.tajo.storage.hbase.HBaseTablespace.java

License:Apache License

@Override
public Path commitTable(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan,
        Schema schema, TableDesc tableDesc) throws IOException {
    if (tableDesc == null) {
        throw new IOException("TableDesc is null while calling loadIncrementalHFiles: " + finalEbId);
    }//from   w  w w  . j  a  va  2 s. c  o m

    Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR));
    Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);

    Configuration hbaseConf = HBaseConfiguration.create(this.hbaseConf);
    hbaseConf.set("hbase.loadincremental.threads.max", "2");

    JobContextImpl jobContext = new JobContextImpl(hbaseConf,
            new JobID(finalEbId.getQueryId().toString(), finalEbId.getId()));

    FileOutputCommitter committer = new FileOutputCommitter(stagingResultDir, jobContext);
    Path jobAttemptPath = committer.getJobAttemptPath(jobContext);
    FileSystem fs = jobAttemptPath.getFileSystem(queryContext.getConf());
    if (!fs.exists(jobAttemptPath) || fs.listStatus(jobAttemptPath) == null) {
        LOG.warn("No query attempt file in " + jobAttemptPath);
        return stagingResultDir;
    }
    committer.commitJob(jobContext);

    // insert into table
    String tableName = tableDesc.getMeta().getOption(HBaseStorageConstants.META_TABLE_KEY);

    HTable htable = new HTable(hbaseConf, tableName);
    try {
        LoadIncrementalHFiles loadIncrementalHFiles = null;
        try {
            loadIncrementalHFiles = new LoadIncrementalHFiles(hbaseConf);
        } catch (Exception e) {
            LOG.error(e.getMessage(), e);
            throw new IOException(e.getMessage(), e);
        }
        loadIncrementalHFiles.doBulkLoad(stagingResultDir, htable);

        return stagingResultDir;
    } finally {
        htable.close();
    }
}

From source file:org.apache.tez.auxservices.ShuffleHandler.java

License:Apache License

@Override
public void initializeApplication(ApplicationInitializationContext context) {

    String user = context.getUser();
    ApplicationId appId = context.getApplicationId();
    ByteBuffer secret = context.getApplicationDataForService();
    // TODO these bytes should be versioned
    try {// www. j  a v  a  2s  .c  o  m
        Token<JobTokenIdentifier> jt = deserializeServiceData(secret);
        // TODO: Once SHuffle is out of NM, this can use MR APIs
        JobID jobId = new JobID(Long.toString(appId.getClusterTimestamp()), appId.getId());
        recordJobShuffleInfo(jobId, user, jt);
    } catch (IOException e) {
        LOG.error("Error during initApp", e);
        // TODO add API to AuxiliaryServices to report failures
    }
}

From source file:org.apache.tez.auxservices.ShuffleHandler.java

License:Apache License

@Override
public void stopApplication(ApplicationTerminationContext context) {
    ApplicationId appId = context.getApplicationId();
    JobID jobId = new JobID(Long.toString(appId.getClusterTimestamp()), appId.getId());
    try {/*from   w w w  . ja va 2 s . co m*/
        removeJobShuffleInfo(jobId);
    } catch (IOException e) {
        LOG.error("Error during stopApp", e);
        // TODO add API to AuxiliaryServices to report failures
    }
}

From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java

License:Apache License

private void verifyLocationHints(Path inputSplitsDir, List<TaskLocationHint> actual) throws Exception {
    JobID jobId = new JobID("dummy", 1);
    JobSplit.TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, remoteFs, conf,
            inputSplitsDir);/*w w w . j a  v a2  s  .  c  om*/
    int splitsCount = splitsInfo.length;
    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount);
    for (int i = 0; i < splitsCount; ++i) {
        locationHints.add(TaskLocationHint.createTaskLocationHint(
                new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null));
    }

    Assert.assertEquals(locationHints, actual);
}