Example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks.

Prototype

public void setNumReduceTasks(int n)

Source Link

Document

Set the requisite number of reduce tasks for this job.

Usage

From source file:org.apache.avro.mapred.TestWeather.java

License:Apache License

/** Uses default mapper with no reduces for a map-only identity job. */
@Test//from  w  w w. j  av a 2 s  .  c  om
@SuppressWarnings("deprecation")
public void testMapOnly() throws Exception {
    JobConf job = new JobConf();
    String inDir = System.getProperty("share.dir", "../../../share") + "/test/data";
    Path input = new Path(inDir + "/weather.avro");
    Path output = new Path(System.getProperty("test.dir", "target/test") + "/weather-ident");

    output.getFileSystem(job).delete(output);

    job.setJobName("identity map weather");

    AvroJob.setInputSchema(job, Weather.SCHEMA$);
    AvroJob.setOutputSchema(job, Weather.SCHEMA$);

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    FileOutputFormat.setCompressOutput(job, true);

    job.setNumReduceTasks(0); // map-only

    JobClient.runJob(job);

    // check output is correct
    DatumReader<Weather> reader = new SpecificDatumReader<Weather>();
    DataFileReader<Weather> check = new DataFileReader<Weather>(new File(inDir + "/weather.avro"), reader);
    DataFileReader<Weather> sorted = new DataFileReader<Weather>(
            new File(output.toString() + "/part-00000.avro"), reader);

    for (Weather w : sorted)
        assertEquals(check.next(), w);

    check.close();
    sorted.close();
}

From source file:org.apache.avro.tool.TetherTool.java

License:Apache License

@Override
public int run(InputStream ins, PrintStream outs, PrintStream err, List<String> args) throws Exception {

    OptionParser p = new OptionParser();
    OptionSpec<File> exec = p.accepts("program", "executable program, usually in HDFS").withRequiredArg()
            .ofType(File.class);
    OptionSpec<String> in = p.accepts("in", "comma-separated input paths").withRequiredArg()
            .ofType(String.class);
    OptionSpec<Path> out = p.accepts("out", "output directory").withRequiredArg().ofType(Path.class);
    OptionSpec<File> outSchema = p.accepts("outschema", "output schema file").withRequiredArg()
            .ofType(File.class);
    OptionSpec<File> mapOutSchema = p.accepts("outschemamap", "map output schema file, if different")
            .withOptionalArg().ofType(File.class);
    OptionSpec<Integer> reduces = p.accepts("reduces", "number of reduces").withOptionalArg()
            .ofType(Integer.class);

    JobConf job = new JobConf();

    try {/*from w w  w.j a v a2  s .  c om*/
        OptionSet opts = p.parse(args.toArray(new String[0]));
        FileInputFormat.addInputPaths(job, in.value(opts));
        FileOutputFormat.setOutputPath(job, out.value(opts));
        TetherJob.setExecutable(job, exec.value(opts));
        job.set(AvroJob.OUTPUT_SCHEMA, Schema.parse(outSchema.value(opts)).toString());
        if (opts.hasArgument(mapOutSchema))
            job.set(AvroJob.MAP_OUTPUT_SCHEMA, Schema.parse(mapOutSchema.value(opts)).toString());
        if (opts.hasArgument(reduces))
            job.setNumReduceTasks(reduces.value(opts));
    } catch (Exception e) {
        p.printHelpOn(err);
        return -1;
    }

    TetherJob.runJob(job);
    return 0;
}

From source file:org.apache.blur.spark.Consumer.java

License:Apache License

private void run() {

    String checkpointDirectory = "hdfs://10.252.5.113:9000/user/hadoop/spark";

    // number of partition for Kafka Topic

    int _partitionCount = 5;

    List<JavaDStream<MessageAndMetadata>> streamsList = new ArrayList<JavaDStream<MessageAndMetadata>>(
            _partitionCount);//from  w  w  w.j  a  v a 2s .  c  om
    JavaDStream<MessageAndMetadata> unionStreams;

    SparkConf conf = new SparkConf().setAppName("KafkaReceiver").set("spark.streaming.blockInterval", "200");

    // Path to Blur Libraries . Can be copied to each Node of Spark Cluster.

    conf.set("spark.executor.extraClassPath", "/home/apache-blur-0.2.4/lib/*");

    // Used KryoSerializer for BlurMutate and Text.
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");

    JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(3000));

    /*
     * Receive Kafka Stream. Create individual Receivers for each Topic
     * Partition
     */

    for (int i = 0; i < _partitionCount; i++) {

        streamsList.add(ssc.receiverStream(new KafkaReceiver(_props, i)));

    }

    /*
     * Union all the streams if there is more than 1 stream
     */

    if (streamsList.size() > 1) {
        unionStreams = ssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
    } else {
        // Otherwise, just use the 1 stream
        unionStreams = streamsList.get(0);
    }

    /*
     * Generate JavaPairDStream
     */

    JavaPairDStream<Text, BlurMutate> pairDStream = unionStreams
            .mapToPair(new PairFunction<MessageAndMetadata, Text, BlurMutate>() {

                private static final long serialVersionUID = 443235214978L;

                public Tuple2<Text, BlurMutate> call(MessageAndMetadata mmeta) {

                    /*
                     * create the BlurMutate from MessageAndMetadata
                     */

                    String message = new String(mmeta.getPayload());
                    String keyStr = DigestUtils.shaHex(message);
                    Text key = new Text((keyStr).getBytes());
                    BlurMutate mutate = new BlurMutate(BlurMutate.MUTATE_TYPE.REPLACE, keyStr, keyStr,
                            "family");
                    mutate.addColumn("message", message);

                    return new Tuple2<Text, BlurMutate>(key, mutate);
                }
            });

    pairDStream.foreachRDD(new Function2<JavaPairRDD<Text, BlurMutate>, Time, Void>() {

        private static final long serialVersionUID = 88875777435L;

        @Override
        public Void call(JavaPairRDD<Text, BlurMutate> rdd, Time time) throws Exception {

            /*
             * Blur Table Details
             */
            TableDescriptor tableDescriptor = new TableDescriptor();
            String tableUri = new Path("hdfs://10.252.5.113:9000/blur/tables/nrt").toString();
            tableDescriptor.tableUri = tableUri;
            tableDescriptor.cluster = "pearson";
            tableDescriptor.name = "nrt";
            tableDescriptor.shardCount = 9;
            Configuration conf = new Configuration();

            /*
             * Partition RDD to match Blur Table Shard Count. Used
             * Custom Partitioner to channel correct BlurMutate to
             * correct Shard.
             */

            final JavaPairRDD<Text, BlurMutate> pRdd = rdd
                    .partitionBy(new BlurSparkPartitioner(tableDescriptor.shardCount))
                    .persist(StorageLevel.MEMORY_ONLY_2());

            /*
             * Blur specific Configuration
             */

            BlurOutputFormat.setIndexLocally(conf, false);
            BlurOutputFormat.setOptimizeInFlight(conf, false);
            conf.setClass("mapreduce.reduce.class", DefaultBlurReducer.class, Reducer.class);
            conf.setClass("mapreduce.outputformat.class", BlurOutputFormat.class, OutputFormat.class);
            conf.setClass("mapreduce.partitioner.class", BlurPartitioner.class, Partitioner.class);
            conf.set("mapred.output.committer.class", BlurOutputCommitter.class.getName());
            conf.setInt("blur.output.max.document.buffer.size", 10000);

            BlurOutputFormat.setTableDescriptor(conf, tableDescriptor);

            JobConf jobConf = new JobConf(conf);

            jobConf.setNumReduceTasks(tableDescriptor.shardCount);
            jobConf.setOutputKeyClass(Text.class);
            jobConf.setOutputValueClass(BlurMutate.class);

            BlurMapReduceUtil.addAllJarsInBlurLib(conf);
            BlurMapReduceUtil.addDependencyJars(conf, org.apache.zookeeper.ZooKeeper.class,
                    org.apache.lucene.codecs.lucene42.Lucene42Codec.class, jobConf.getOutputKeyClass(),
                    jobConf.getOutputValueClass());

            /*
             * Write the RDD to Blur Table
             */

            if (pRdd.count() > 0)
                pRdd.saveAsNewAPIHadoopFile(tableUri, Text.class, BlurMutate.class, BlurOutputFormat.class,
                        jobConf);

            return null;
        }
    });

    // ssc.checkpoint(checkpointDirectory);
    ssc.start();
    ssc.awaitTermination();
}

From source file:org.apache.cassandra.bulkloader.CassandraBulkLoader.java

License:Apache License

public static void runJob(String[] args) {
    JobConf conf = new JobConf(CassandraBulkLoader.class);

    if (args.length >= 4) {
        conf.setNumReduceTasks(new Integer(args[3]));
    }// w w  w.  j  a v a 2  s  .  c om

    try {
        // We store the cassandra storage-conf.xml on the HDFS cluster
        DistributedCache.addCacheFile(new URI("/cassandra/storage-conf.xml#storage-conf.xml"), conf);
    } catch (URISyntaxException e) {
        throw new RuntimeException(e);
    }
    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setJobName("CassandraBulkLoader_v2");
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(conf, new Path(args[1]));
    FileOutputFormat.setOutputPath(conf, new Path(args[2]));
    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.crunch.io.avro.AvroKeyValueIT.java

License:Apache License

/**
 * Produces an Avro file using the org.apache.avro.mapred.* API.
 *//*from w  w  w .  j a v  a2  s .  c  o  m*/
private Path produceMapRedOutputFile() throws IOException {

    JobConf conf = new JobConf(tempDir.getDefaultConfiguration(), AvroKeyValueIT.class);

    org.apache.avro.mapred.AvroJob.setOutputSchema(conf,
            Pair.getPairSchema(Person.SCHEMA$, Schema.create(Schema.Type.INT)));

    conf.setMapperClass(MapRedPersonMapper.class);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);

    Path outputPath = new Path(tempDir.getFileName("mapreduce_output"));
    org.apache.hadoop.mapred.FileInputFormat.setInputPaths(conf, tempDir.copyResourcePath("letters.txt"));
    org.apache.hadoop.mapred.FileOutputFormat.setOutputPath(conf, outputPath);

    RunningJob runningJob = JobClient.runJob(conf);
    runningJob.waitForCompletion();

    return outputPath;

}

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }//from  w  w w . j  ava  2  s. com
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java

License:Apache License

@Test
public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "hbaseBulkOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);
    LOG.info("starting: " + testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);

    //create table
    conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:one,spanish:uno", "2,english:two,spanish:dos",
            "3,english:three,spanish:tres" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();//from   ww w  . ja va  2s. c  o m
    Path interPath = new Path(methodTestDir, "inter");
    //create job
    JobConf job = new JobConf(conf);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWriteOldMapper.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormat(HBaseBulkOutputFormat.class);
    org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath);
    job.setOutputCommitter(HBaseBulkOutputCommitter.class);

    //manually create transaction
    RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf);
    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName));
        outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY,
                HCatUtil.serialize(txn));
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } finally {
        rm.close();
    }

    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);

    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    //test if load count is the same
    assertEquals(data.length, index);
    //test if scratch directory was erased
    assertFalse(FileSystem.get(job).exists(interPath));
}

From source file:org.apache.hcatalog.hbase.TestHBaseDirectOutputFormat.java

License:Apache License

@Test
public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "directOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties()));

    //create table
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:ONE,spanish:UNO", "2,english:ONE,spanish:DOS", "3,english:ONE,spanish:TRES" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    getFileSystem().mkdirs(inputPath);//from  ww w  .  ja  v  a  2  s  . c  o  m
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();

    //create job
    JobConf job = new JobConf(conf);
    job.setJobName(testName);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWrite.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormat(HBaseDirectOutputFormat.class);
    job.set(TableOutputFormat.OUTPUT_TABLE, tableName);
    job.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);

    //manually create transaction
    RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf);
    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName));
        outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY,
                HCatUtil.serialize(txn));
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } finally {
        rm.close();
    }

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);
    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    assertEquals(data.length, index);
}

From source file:org.apache.hcatalog.hbase.TestHBaseInputFormat.java

License:Apache License

@Test
public void TestHBaseInputFormatProjectionReadMR() throws Exception {

    String tableName = newTableName("mytable");
    String tableQuery = "CREATE TABLE " + tableName
            + "(key string, testqualifier1 string, testqualifier2 string) STORED BY "
            + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'"
            + "TBLPROPERTIES ('hbase.columns.mapping'=':key,"
            + "testFamily:testQualifier1,testFamily:testQualifier2')";

    CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
    assertEquals(0, responseTwo.getResponseCode());

    HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
    boolean doesTableExist = hAdmin.tableExists(tableName);
    assertTrue(doesTableExist);/*from ww w. j a v a 2 s  .  c  om*/

    populateHBaseTable(tableName, 5);

    Configuration conf = new Configuration(hcatConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(getHiveConf().getAllProperties()));

    // output settings
    Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR");
    FileSystem fs = getFileSystem();
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }
    // create job
    JobConf job = new JobConf(conf);
    job.setJobName("hbase-scan-column");
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapReadProjectionHTable.class);
    job.setInputFormat(HBaseInputFormat.class);

    //Configure projection schema
    job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema()));
    Job newJob = new Job(job);
    HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
    String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO);
    InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString);
    job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString);
    for (PartInfo partinfo : info.getPartitions()) {
        for (Entry<String, String> entry : partinfo.getJobProperties().entrySet())
            job.set(entry.getKey(), entry.getValue());
    }
    assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS));

    job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class);
    org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());
    assertFalse(MapReadProjHTable.error);
    assertEquals(MapReadProjHTable.count, 1);

    String dropTableQuery = "DROP TABLE " + tableName;
    CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
    assertEquals(0, responseThree.getResponseCode());

    boolean isHbaseTableThere = hAdmin.tableExists(tableName);
    assertFalse(isHbaseTableThere);
}

From source file:org.apache.hcatalog.hbase.TestHCatHBaseInputFormat.java

License:Apache License

@Test
public void TestHBaseInputFormatProjectionReadMR() throws Exception {

    String tableName = newTableName("mytable");
    String tableQuery = "CREATE TABLE " + tableName
            + "(key string, testqualifier1 string, testqualifier2 string) STORED BY "
            + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'"
            + "TBLPROPERTIES ('hbase.columns.mapping'=':key,"
            + "testFamily:testQualifier1,testFamily:testQualifier2')";

    CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery);
    assertEquals(0, responseTwo.getResponseCode());

    HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
    boolean doesTableExist = hAdmin.tableExists(tableName);
    assertTrue(doesTableExist);//from w  w w.  j  av  a2s .c o  m

    populateHBaseTable(tableName, 5);

    Configuration conf = new Configuration(hcatConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(getHiveConf().getAllProperties()));

    // output settings
    Path outputDir = new Path(getTestDir(), "mapred/testHBaseInputFormatProjectionReadMR");
    FileSystem fs = getFileSystem();
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }
    // create job
    JobConf job = new JobConf(conf);
    job.setJobName("hbase-scan-column");
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapReadProjectionHTable.class);
    job.setInputFormat(HBaseInputFormat.class);

    //Configure projection schema
    job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema()));
    Job newJob = new Job(job);
    HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
    String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO);
    InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString);
    job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString);
    for (PartInfo partinfo : info.getPartitions()) {
        for (Entry<String, String> entry : partinfo.getJobProperties().entrySet())
            job.set(entry.getKey(), entry.getValue());
    }
    assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS));

    job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class);
    org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());
    assertFalse(MapReadProjectionHTable.error);
    assertEquals(1, MapReadProjectionHTable.count);

    String dropTableQuery = "DROP TABLE " + tableName;
    CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery);
    assertEquals(0, responseThree.getResponseCode());

    boolean isHbaseTableThere = hAdmin.tableExists(tableName);
    assertFalse(isHbaseTableThere);
}