Example usage for org.apache.hadoop.mapreduce JobID JobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobID JobID.

Prototype

public JobID(String jtIdentifier, int id)

Source Link

Document

Constructs a JobID object

Usage

From source file:org.apache.parquet.hadoop.codec.CodecConfigTest.java

License:Apache License

public void shouldUseParquetFlagToSetCodec(String codecNameStr, CompressionCodecName expectedCodec)
        throws IOException {

    //Test mapreduce API
    Job job = new Job();
    Configuration conf = job.getConfiguration();
    conf.set(ParquetOutputFormat.COMPRESSION, codecNameStr);
    TaskAttemptContext task = ContextUtil.newTaskAttemptContext(conf,
            new TaskAttemptID(new TaskID(new JobID("test", 1), false, 1), 1));
    Assert.assertEquals(CodecConfig.from(task).getCodec(), expectedCodec);

    //Test mapred API
    JobConf jobConf = new JobConf();
    jobConf.set(ParquetOutputFormat.COMPRESSION, codecNameStr);
    Assert.assertEquals(CodecConfig.from(jobConf).getCodec(), expectedCodec);
}

From source file:org.apache.parquet.hadoop.codec.CodecConfigTest.java

License:Apache License

public void shouldUseHadoopFlagToSetCodec(String codecClassStr, CompressionCodecName expectedCodec)
        throws IOException {
    //Test mapreduce API
    Job job = new Job();
    Configuration conf = job.getConfiguration();
    conf.setBoolean("mapred.output.compress", true);
    conf.set("mapred.output.compression.codec", codecClassStr);
    TaskAttemptContext task = ContextUtil.newTaskAttemptContext(conf,
            new TaskAttemptID(new TaskID(new JobID("test", 1), false, 1), 1));
    Assert.assertEquals(expectedCodec, CodecConfig.from(task).getCodec());

    //Test mapred API
    JobConf jobConf = new JobConf();
    jobConf.setBoolean("mapred.output.compress", true);
    jobConf.set("mapred.output.compression.codec", codecClassStr);
    Assert.assertEquals(CodecConfig.from(jobConf).getCodec(), expectedCodec);
}

From source file:org.apache.parquet.hadoop.thrift.TestParquetToThriftReadWriteAndProjection.java

License:Apache License

private <T extends TBase<?, ?>> void shouldDoProjection(Configuration conf, T recordToWrite,
        T exptectedReadResult, Class<? extends TBase<?, ?>> thriftClass) throws Exception {
    final Path parquetFile = new Path("target/test/TestParquetToThriftReadWriteAndProjection/file.parquet");
    final FileSystem fs = parquetFile.getFileSystem(conf);
    if (fs.exists(parquetFile)) {
        fs.delete(parquetFile, true);/*from  ww  w  .  j a  va 2  s. com*/
    }

    //create a test file
    final TProtocolFactory protocolFactory = new TCompactProtocol.Factory();
    final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0);
    final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile,
            ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, thriftClass);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos));

    recordToWrite.write(protocol);
    w.write(new BytesWritable(baos.toByteArray()));
    w.close();

    final ParquetThriftInputFormat<T> parquetThriftInputFormat = new ParquetThriftInputFormat<T>();
    final Job job = new Job(conf, "read");
    job.setInputFormatClass(ParquetThriftInputFormat.class);
    ParquetThriftInputFormat.setInputPaths(job, parquetFile);
    final JobID jobID = new JobID("local", 1);
    List<InputSplit> splits = parquetThriftInputFormat
            .getSplits(ContextUtil.newJobContext(ContextUtil.getConfiguration(job), jobID));
    T readValue = null;
    for (InputSplit split : splits) {
        TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(
                ContextUtil.getConfiguration(job), new TaskAttemptID(new TaskID(jobID, true, 1), 0));
        final RecordReader<Void, T> reader = parquetThriftInputFormat.createRecordReader(split,
                taskAttemptContext);
        reader.initialize(split, taskAttemptContext);
        if (reader.nextKeyValue()) {
            readValue = reader.getCurrentValue();
            LOG.info(readValue);
        }
    }
    assertEquals(exptectedReadResult, readValue);

}

From source file:org.apache.parquet.pig.PerfTest2.java

License:Apache License

public static void write(String out) throws IOException, ParserException, InterruptedException, ExecException {
    {/*from  w  ww . j  ava  2  s  .  c o m*/
        StringBuilder schemaString = new StringBuilder("a0: chararray");
        for (int i = 1; i < COLUMN_COUNT; i++) {
            schemaString.append(", a" + i + ": chararray");
        }

        String location = out;
        String schema = schemaString.toString();

        StoreFuncInterface storer = new ParquetStorer();
        Job job = new Job(conf);
        storer.setStoreFuncUDFContextSignature("sig");
        String absPath = storer.relToAbsPathForStoreLocation(location,
                new Path(new File(".").getAbsoluteFile().toURI()));
        storer.setStoreLocation(absPath, job);
        storer.checkSchema(new ResourceSchema(Utils.getSchemaFromString(schema)));
        @SuppressWarnings("unchecked") // that's how the base class is defined
        OutputFormat<Void, Tuple> outputFormat = storer.getOutputFormat();
        // it's ContextUtil.getConfiguration(job) and not just conf !
        JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job),
                new JobID("jt", jobid++));
        outputFormat.checkOutputSpecs(jobContext);
        if (schema != null) {
            ResourceSchema resourceSchema = new ResourceSchema(Utils.getSchemaFromString(schema));
            storer.checkSchema(resourceSchema);
            if (storer instanceof StoreMetadata) {
                ((StoreMetadata) storer).storeSchema(resourceSchema, absPath, job);
            }
        }
        TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(
                ContextUtil.getConfiguration(job), new TaskAttemptID("jt", jobid, true, 1, 0));
        RecordWriter<Void, Tuple> recordWriter = outputFormat.getRecordWriter(taskAttemptContext);
        storer.prepareToWrite(recordWriter);

        for (int i = 0; i < ROW_COUNT; i++) {
            Tuple tuple = TupleFactory.getInstance().newTuple(COLUMN_COUNT);
            for (int j = 0; j < COLUMN_COUNT; j++) {
                tuple.set(j, "a" + i + "_" + j);
            }
            storer.putNext(tuple);
        }

        recordWriter.close(taskAttemptContext);
        OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskAttemptContext);
        outputCommitter.commitTask(taskAttemptContext);
        outputCommitter.commitJob(jobContext);

    }
}

From source file:org.apache.parquet.pig.PerfTest2.java

License:Apache License

static void load(String out, int colsToLoad, StringBuilder results) throws Exception {
    StringBuilder schemaString = new StringBuilder("a0: chararray");
    for (int i = 1; i < colsToLoad; i++) {
        schemaString.append(", a" + i + ": chararray");
    }/*from w  w w .  j  a v  a 2 s.c  om*/

    long t0 = System.currentTimeMillis();
    Job job = new Job(conf);
    int loadjobId = jobid++;
    LoadFunc loadFunc = new ParquetLoader(schemaString.toString());
    loadFunc.setUDFContextSignature("sigLoader" + loadjobId);
    String absPath = loadFunc.relativeToAbsolutePath(out, new Path(new File(".").getAbsoluteFile().toURI()));
    loadFunc.setLocation(absPath, job);
    @SuppressWarnings("unchecked") // that's how the base class is defined
    InputFormat<Void, Tuple> inputFormat = loadFunc.getInputFormat();
    JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job),
            new JobID("jt", loadjobId));
    List<InputSplit> splits = inputFormat.getSplits(jobContext);
    int i = 0;
    int taskid = 0;
    for (InputSplit split : splits) {
        TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(
                ContextUtil.getConfiguration(job), new TaskAttemptID("jt", loadjobId, true, taskid++, 0));
        RecordReader<Void, Tuple> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
        loadFunc.prepareToRead(recordReader, null);
        recordReader.initialize(split, taskAttemptContext);
        Tuple t;
        while ((t = loadFunc.getNext()) != null) {
            if (Log.DEBUG)
                System.out.println(t);
            ++i;
        }
    }
    assertEquals(ROW_COUNT, i);
    long t1 = System.currentTimeMillis();
    results.append((t1 - t0) + " ms to read " + colsToLoad + " columns\n");
}

From source file:org.apache.tajo.storage.hbase.HBaseStorageManager.java

License:Apache License

@Override
public Path commitOutputData(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan,
        Schema schema, TableDesc tableDesc) throws IOException {
    if (tableDesc == null) {
        throw new IOException("TableDesc is null while calling loadIncrementalHFiles: " + finalEbId);
    }//from w ww .  j av a  2 s  . c om
    Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR));
    Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);

    Configuration hbaseConf = HBaseStorageManager.getHBaseConfiguration(queryContext.getConf(),
            tableDesc.getMeta());
    hbaseConf.set("hbase.loadincremental.threads.max", "2");

    JobContextImpl jobContext = new JobContextImpl(hbaseConf,
            new JobID(finalEbId.getQueryId().toString(), finalEbId.getId()));

    FileOutputCommitter committer = new FileOutputCommitter(stagingResultDir, jobContext);
    Path jobAttemptPath = committer.getJobAttemptPath(jobContext);
    FileSystem fs = jobAttemptPath.getFileSystem(queryContext.getConf());
    if (!fs.exists(jobAttemptPath) || fs.listStatus(jobAttemptPath) == null) {
        LOG.warn("No query attempt file in " + jobAttemptPath);
        return stagingResultDir;
    }
    committer.commitJob(jobContext);

    if (tableDesc.getName() == null && tableDesc.getPath() != null) {

        // insert into location
        return super.commitOutputData(queryContext, finalEbId, plan, schema, tableDesc, false);
    } else {
        // insert into table
        String tableName = tableDesc.getMeta().getOption(HBaseStorageConstants.META_TABLE_KEY);

        HTable htable = new HTable(hbaseConf, tableName);
        try {
            LoadIncrementalHFiles loadIncrementalHFiles = null;
            try {
                loadIncrementalHFiles = new LoadIncrementalHFiles(hbaseConf);
            } catch (Exception e) {
                LOG.error(e.getMessage(), e);
                throw new IOException(e.getMessage(), e);
            }
            loadIncrementalHFiles.doBulkLoad(stagingResultDir, htable);

            return stagingResultDir;
        } finally {
            htable.close();
        }
    }
}

From source file:org.apache.tajo.storage.hbase.HBaseTablespace.java

License:Apache License

@Override
public Path commitTable(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan,
        Schema schema, TableDesc tableDesc) throws IOException {
    if (tableDesc == null) {
        throw new IOException("TableDesc is null while calling loadIncrementalHFiles: " + finalEbId);
    }//from   w  w w  . j  a  va  2 s. c  o m

    Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR));
    Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);

    Configuration hbaseConf = HBaseConfiguration.create(this.hbaseConf);
    hbaseConf.set("hbase.loadincremental.threads.max", "2");

    JobContextImpl jobContext = new JobContextImpl(hbaseConf,
            new JobID(finalEbId.getQueryId().toString(), finalEbId.getId()));

    FileOutputCommitter committer = new FileOutputCommitter(stagingResultDir, jobContext);
    Path jobAttemptPath = committer.getJobAttemptPath(jobContext);
    FileSystem fs = jobAttemptPath.getFileSystem(queryContext.getConf());
    if (!fs.exists(jobAttemptPath) || fs.listStatus(jobAttemptPath) == null) {
        LOG.warn("No query attempt file in " + jobAttemptPath);
        return stagingResultDir;
    }
    committer.commitJob(jobContext);

    // insert into table
    String tableName = tableDesc.getMeta().getOption(HBaseStorageConstants.META_TABLE_KEY);

    HTable htable = new HTable(hbaseConf, tableName);
    try {
        LoadIncrementalHFiles loadIncrementalHFiles = null;
        try {
            loadIncrementalHFiles = new LoadIncrementalHFiles(hbaseConf);
        } catch (Exception e) {
            LOG.error(e.getMessage(), e);
            throw new IOException(e.getMessage(), e);
        }
        loadIncrementalHFiles.doBulkLoad(stagingResultDir, htable);

        return stagingResultDir;
    } finally {
        htable.close();
    }
}

From source file:org.apache.tez.auxservices.ShuffleHandler.java

License:Apache License

@Override
public void initializeApplication(ApplicationInitializationContext context) {

    String user = context.getUser();
    ApplicationId appId = context.getApplicationId();
    ByteBuffer secret = context.getApplicationDataForService();
    // TODO these bytes should be versioned
    try {// www. j  a v  a  2s  .c  o  m
        Token<JobTokenIdentifier> jt = deserializeServiceData(secret);
        // TODO: Once SHuffle is out of NM, this can use MR APIs
        JobID jobId = new JobID(Long.toString(appId.getClusterTimestamp()), appId.getId());
        recordJobShuffleInfo(jobId, user, jt);
    } catch (IOException e) {
        LOG.error("Error during initApp", e);
        // TODO add API to AuxiliaryServices to report failures
    }
}

From source file:org.apache.tez.auxservices.ShuffleHandler.java

License:Apache License

@Override
public void stopApplication(ApplicationTerminationContext context) {
    ApplicationId appId = context.getApplicationId();
    JobID jobId = new JobID(Long.toString(appId.getClusterTimestamp()), appId.getId());
    try {/*from   w w w  . ja va 2 s . co m*/
        removeJobShuffleInfo(jobId);
    } catch (IOException e) {
        LOG.error("Error during stopApp", e);
        // TODO add API to AuxiliaryServices to report failures
    }
}

From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java

License:Apache License

private void verifyLocationHints(Path inputSplitsDir, List<TaskLocationHint> actual) throws Exception {
    JobID jobId = new JobID("dummy", 1);
    JobSplit.TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, remoteFs, conf,
            inputSplitsDir);/*w w w . j a  v a2  s  .  c  om*/
    int splitsCount = splitsInfo.length;
    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount);
    for (int i = 0; i < splitsCount; ++i) {
        locationHints.add(TaskLocationHint.createTaskLocationHint(
                new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null));
    }

    Assert.assertEquals(locationHints, actual);
}