List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks
public void setNumReduceTasks(int n)
From source file:org.apache.avro.mapred.TestWeather.java
License:Apache License
/** Uses default mapper with no reduces for a map-only identity job. */ @Test//from w w w. j av a 2 s . c om @SuppressWarnings("deprecation") public void testMapOnly() throws Exception { JobConf job = new JobConf(); String inDir = System.getProperty("share.dir", "../../../share") + "/test/data"; Path input = new Path(inDir + "/weather.avro"); Path output = new Path(System.getProperty("test.dir", "target/test") + "/weather-ident"); output.getFileSystem(job).delete(output); job.setJobName("identity map weather"); AvroJob.setInputSchema(job, Weather.SCHEMA$); AvroJob.setOutputSchema(job, Weather.SCHEMA$); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); job.setNumReduceTasks(0); // map-only JobClient.runJob(job); // check output is correct DatumReader<Weather> reader = new SpecificDatumReader<Weather>(); DataFileReader<Weather> check = new DataFileReader<Weather>(new File(inDir + "/weather.avro"), reader); DataFileReader<Weather> sorted = new DataFileReader<Weather>( new File(output.toString() + "/part-00000.avro"), reader); for (Weather w : sorted) assertEquals(check.next(), w); check.close(); sorted.close(); }
From source file:org.apache.avro.tool.TetherTool.java
License:Apache License
@Override public int run(InputStream ins, PrintStream outs, PrintStream err, List<String> args) throws Exception { OptionParser p = new OptionParser(); OptionSpec<File> exec = p.accepts("program", "executable program, usually in HDFS").withRequiredArg() .ofType(File.class); OptionSpec<String> in = p.accepts("in", "comma-separated input paths").withRequiredArg() .ofType(String.class); OptionSpec<Path> out = p.accepts("out", "output directory").withRequiredArg().ofType(Path.class); OptionSpec<File> outSchema = p.accepts("outschema", "output schema file").withRequiredArg() .ofType(File.class); OptionSpec<File> mapOutSchema = p.accepts("outschemamap", "map output schema file, if different") .withOptionalArg().ofType(File.class); OptionSpec<Integer> reduces = p.accepts("reduces", "number of reduces").withOptionalArg() .ofType(Integer.class); JobConf job = new JobConf(); try {/*from w w w.j a v a2 s . c om*/ OptionSet opts = p.parse(args.toArray(new String[0])); FileInputFormat.addInputPaths(job, in.value(opts)); FileOutputFormat.setOutputPath(job, out.value(opts)); TetherJob.setExecutable(job, exec.value(opts)); job.set(AvroJob.OUTPUT_SCHEMA, Schema.parse(outSchema.value(opts)).toString()); if (opts.hasArgument(mapOutSchema)) job.set(AvroJob.MAP_OUTPUT_SCHEMA, Schema.parse(mapOutSchema.value(opts)).toString()); if (opts.hasArgument(reduces)) job.setNumReduceTasks(reduces.value(opts)); } catch (Exception e) { p.printHelpOn(err); return -1; } TetherJob.runJob(job); return 0; }
From source file:org.apache.blur.spark.Consumer.java
License:Apache License
private void run() { String checkpointDirectory = "hdfs://10.252.5.113:9000/user/hadoop/spark"; // number of partition for Kafka Topic int _partitionCount = 5; List<JavaDStream<MessageAndMetadata>> streamsList = new ArrayList<JavaDStream<MessageAndMetadata>>( _partitionCount);//from w w w.j a v a 2s . c om JavaDStream<MessageAndMetadata> unionStreams; SparkConf conf = new SparkConf().setAppName("KafkaReceiver").set("spark.streaming.blockInterval", "200"); // Path to Blur Libraries . Can be copied to each Node of Spark Cluster. conf.set("spark.executor.extraClassPath", "/home/apache-blur-0.2.4/lib/*"); // Used KryoSerializer for BlurMutate and Text. conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(3000)); /* * Receive Kafka Stream. Create individual Receivers for each Topic * Partition */ for (int i = 0; i < _partitionCount; i++) { streamsList.add(ssc.receiverStream(new KafkaReceiver(_props, i))); } /* * Union all the streams if there is more than 1 stream */ if (streamsList.size() > 1) { unionStreams = ssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size())); } else { // Otherwise, just use the 1 stream unionStreams = streamsList.get(0); } /* * Generate JavaPairDStream */ JavaPairDStream<Text, BlurMutate> pairDStream = unionStreams .mapToPair(new PairFunction<MessageAndMetadata, Text, BlurMutate>() { private static final long serialVersionUID = 443235214978L; public Tuple2<Text, BlurMutate> call(MessageAndMetadata mmeta) { /* * create the BlurMutate from MessageAndMetadata */ String message = new String(mmeta.getPayload()); String keyStr = DigestUtils.shaHex(message); Text key = new Text((keyStr).getBytes()); BlurMutate mutate = new BlurMutate(BlurMutate.MUTATE_TYPE.REPLACE, keyStr, keyStr, "family"); mutate.addColumn("message", message); return new Tuple2<Text, BlurMutate>(key, mutate); } }); pairDStream.foreachRDD(new Function2<JavaPairRDD<Text, BlurMutate>, Time, Void>() { private static final long serialVersionUID = 88875777435L; @Override public Void call(JavaPairRDD<Text, BlurMutate> rdd, Time time) throws Exception { /* * Blur Table Details */ TableDescriptor tableDescriptor = new TableDescriptor(); String tableUri = new Path("hdfs://10.252.5.113:9000/blur/tables/nrt").toString(); tableDescriptor.tableUri = tableUri; tableDescriptor.cluster = "pearson"; tableDescriptor.name = "nrt"; tableDescriptor.shardCount = 9; Configuration conf = new Configuration(); /* * Partition RDD to match Blur Table Shard Count. Used * Custom Partitioner to channel correct BlurMutate to * correct Shard. */ final JavaPairRDD<Text, BlurMutate> pRdd = rdd .partitionBy(new BlurSparkPartitioner(tableDescriptor.shardCount)) .persist(StorageLevel.MEMORY_ONLY_2()); /* * Blur specific Configuration */ BlurOutputFormat.setIndexLocally(conf, false); BlurOutputFormat.setOptimizeInFlight(conf, false); conf.setClass("mapreduce.reduce.class", DefaultBlurReducer.class, Reducer.class); conf.setClass("mapreduce.outputformat.class", BlurOutputFormat.class, OutputFormat.class); conf.setClass("mapreduce.partitioner.class", BlurPartitioner.class, Partitioner.class); conf.set("mapred.output.committer.class", BlurOutputCommitter.class.getName()); conf.setInt("blur.output.max.document.buffer.size", 10000); BlurOutputFormat.setTableDescriptor(conf, tableDescriptor); JobConf jobConf = new JobConf(conf); jobConf.setNumReduceTasks(tableDescriptor.shardCount); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(BlurMutate.class); BlurMapReduceUtil.addAllJarsInBlurLib(conf); BlurMapReduceUtil.addDependencyJars(conf, org.apache.zookeeper.ZooKeeper.class, org.apache.lucene.codecs.lucene42.Lucene42Codec.class, jobConf.getOutputKeyClass(), jobConf.getOutputValueClass()); /* * Write the RDD to Blur Table */ if (pRdd.count() > 0) pRdd.saveAsNewAPIHadoopFile(tableUri, Text.class, BlurMutate.class, BlurOutputFormat.class, jobConf); return null; } }); // ssc.checkpoint(checkpointDirectory); ssc.start(); ssc.awaitTermination(); }
From source file:org.apache.cassandra.bulkloader.CassandraBulkLoader.java
License:Apache License
public static void runJob(String[] args) { JobConf conf = new JobConf(CassandraBulkLoader.class); if (args.length >= 4) { conf.setNumReduceTasks(new Integer(args[3])); }// w w w. j a v a 2 s . c om try { // We store the cassandra storage-conf.xml on the HDFS cluster DistributedCache.addCacheFile(new URI("/cassandra/storage-conf.xml#storage-conf.xml"), conf); } catch (URISyntaxException e) { throw new RuntimeException(e); } conf.setInputFormat(KeyValueTextInputFormat.class); conf.setJobName("CassandraBulkLoader_v2"); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); FileOutputFormat.setOutputPath(conf, new Path(args[2])); try { JobClient.runJob(conf); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.apache.crunch.io.avro.AvroKeyValueIT.java
License:Apache License
/** * Produces an Avro file using the org.apache.avro.mapred.* API. *//*from w w w . j a v a2 s . c o m*/ private Path produceMapRedOutputFile() throws IOException { JobConf conf = new JobConf(tempDir.getDefaultConfiguration(), AvroKeyValueIT.class); org.apache.avro.mapred.AvroJob.setOutputSchema(conf, Pair.getPairSchema(Person.SCHEMA$, Schema.create(Schema.Type.INT))); conf.setMapperClass(MapRedPersonMapper.class); conf.setNumReduceTasks(0); conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); Path outputPath = new Path(tempDir.getFileName("mapreduce_output")); org.apache.hadoop.mapred.FileInputFormat.setInputPaths(conf, tempDir.copyResourcePath("letters.txt")); org.apache.hadoop.mapred.FileOutputFormat.setOutputPath(conf, outputPath); RunningJob runningJob = JobClient.runJob(conf); runningJob.waitForCompletion(); return outputPath; }
From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
public List<DataSegment> run() throws IOException { final JobConf jobConf = new JobConf(); jobConf.setKeepFailedTaskFiles(false); for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) { jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()"); }//from w w w . j ava 2 s. com final List<DataSegment> segments = converterConfig.getSegments(); if (segments.isEmpty()) { throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource()); } converterConfigIntoConfiguration(converterConfig, segments, jobConf); jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); setJobName(jobConf, segments); if (converterConfig.getJobPriority() != null) { jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority())); } final Job job = Job.getInstance(jobConf); job.setInputFormatClass(ConfigInputFormat.class); job.setMapperClass(ConvertingMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapSpeculativeExecution(false); job.setOutputFormatClass(ConvertingOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job); Throwable throwable = null; try { job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); final boolean success = job.waitForCompletion(true); if (!success) { final TaskReport[] reports = job.getTaskReports(TaskType.MAP); if (reports != null) { for (final TaskReport report : reports) { log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics())); } } return null; } try { loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue(); writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue(); } catch (IOException ex) { log.error(ex, "Could not fetch counters"); } final JobID jobID = job.getJobID(); final Path jobDir = getJobPath(jobID, job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true); final List<Path> goodPaths = new ArrayList<>(); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); if (locatedFileStatus.isFile()) { final Path myPath = locatedFileStatus.getPath(); if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY)); } } } if (goodPaths.isEmpty()) { log.warn("No good data found at [%s]", jobDir); return null; } final List<DataSegment> returnList = ImmutableList .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() { @Nullable @Override public DataSegment apply(final Path input) { try { if (!fs.exists(input)) { throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir); } } catch (final IOException e) { throw Throwables.propagate(e); } try (final InputStream stream = fs.open(input)) { return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class); } catch (final IOException e) { throw Throwables.propagate(e); } } })); if (returnList.size() == segments.size()) { return returnList; } else { throw new ISE( "Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir); } } catch (InterruptedException | ClassNotFoundException e) { RuntimeException exception = Throwables.propagate(e); throwable = exception; throw exception; } catch (Throwable t) { throwable = t; throw t; } finally { try { cleanup(job); } catch (IOException e) { if (throwable != null) { throwable.addSuppressed(e); } else { log.error(e, "Could not clean up job [%s]", job.getJobID()); } } } }
From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java
License:Apache License
@Test public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { String testName = "hbaseBulkOutputFormatTest"; Path methodTestDir = new Path(getTestDir(), testName); LOG.info("starting: " + testName); String tableName = newTableName(testName).toLowerCase(); String familyName = "my_family"; byte[] familyNameBytes = Bytes.toBytes(familyName); //include hbase config in conf file Configuration conf = new Configuration(allConf); //create table conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); createTable(tableName, new String[] { familyName }); String data[] = { "1,english:one,spanish:uno", "2,english:two,spanish:dos", "3,english:three,spanish:tres" }; // input/output settings Path inputPath = new Path(methodTestDir, "mr_input"); FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); for (String line : data) os.write(Bytes.toBytes(line + "\n")); os.close();//from ww w . ja va 2s. c o m Path interPath = new Path(methodTestDir, "inter"); //create job JobConf job = new JobConf(conf); job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); job.setJarByClass(this.getClass()); job.setMapperClass(MapWriteOldMapper.class); job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); job.setOutputFormat(HBaseBulkOutputFormat.class); org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath); job.setOutputCommitter(HBaseBulkOutputCommitter.class); //manually create transaction RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); try { OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, HCatUtil.serialize(txn)); job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); } finally { rm.close(); } job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(HCatRecord.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); RunningJob runJob = JobClient.runJob(job); runJob.waitForCompletion(); assertTrue(runJob.isSuccessful()); //verify HTable table = new HTable(conf, tableName); Scan scan = new Scan(); scan.addFamily(familyNameBytes); ResultScanner scanner = table.getScanner(scan); int index = 0; for (Result result : scanner) { String vals[] = data[index].toString().split(","); for (int i = 1; i < vals.length; i++) { String pair[] = vals[i].split(":"); assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); } index++; } //test if load count is the same assertEquals(data.length, index); //test if scratch directory was erased assertFalse(FileSystem.get(job).exists(interPath)); }
From source file:org.apache.hcatalog.hbase.TestHBaseDirectOutputFormat.java
License:Apache License
@Test public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { String testName = "directOutputFormatTest"; Path methodTestDir = new Path(getTestDir(), testName); String tableName = newTableName(testName).toLowerCase(); String familyName = "my_family"; byte[] familyNameBytes = Bytes.toBytes(familyName); //include hbase config in conf file Configuration conf = new Configuration(allConf); conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); //create table createTable(tableName, new String[] { familyName }); String data[] = { "1,english:ONE,spanish:UNO", "2,english:ONE,spanish:DOS", "3,english:ONE,spanish:TRES" }; // input/output settings Path inputPath = new Path(methodTestDir, "mr_input"); getFileSystem().mkdirs(inputPath);//from ww w . ja v a 2 s . c o m FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); for (String line : data) os.write(Bytes.toBytes(line + "\n")); os.close(); //create job JobConf job = new JobConf(conf); job.setJobName(testName); job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); job.setJarByClass(this.getClass()); job.setMapperClass(MapWrite.class); job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); job.setOutputFormat(HBaseDirectOutputFormat.class); job.set(TableOutputFormat.OUTPUT_TABLE, tableName); job.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); //manually create transaction RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); try { OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, HCatUtil.serialize(txn)); job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); } finally { rm.close(); } job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(HCatRecord.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); RunningJob runJob = JobClient.runJob(job); runJob.waitForCompletion(); assertTrue(runJob.isSuccessful()); //verify HTable table = new HTable(conf, tableName); Scan scan = new Scan(); scan.addFamily(familyNameBytes); ResultScanner scanner = table.getScanner(scan); int index = 0; for (Result result : scanner) { String vals[] = data[index].toString().split(","); for (int i = 1; i < vals.length; i++) { String pair[] = vals[i].split(":"); assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); } index++; } assertEquals(data.length, index); }
From source file:org.apache.hcatalog.hbase.TestHBaseInputFormat.java
License:Apache License
@Test public void TestHBaseInputFormatProjectionReadMR() throws Exception { String tableName = newTableName("mytable"); String tableQuery = "CREATE TABLE " + tableName + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + "testFamily:testQualifier1,testFamily:testQualifier2')"; CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); assertEquals(0, responseTwo.getResponseCode()); HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); boolean doesTableExist = hAdmin.tableExists(tableName); assertTrue(doesTableExist);/*from ww w. j a v a 2 s . c om*/ populateHBaseTable(tableName, 5); Configuration conf = new Configuration(hcatConf); conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(getHiveConf().getAllProperties())); // output settings Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); FileSystem fs = getFileSystem(); if (fs.exists(outputDir)) { fs.delete(outputDir, true); } // create job JobConf job = new JobConf(conf); job.setJobName("hbase-scan-column"); job.setJarByClass(this.getClass()); job.setMapperClass(MapReadProjectionHTable.class); job.setInputFormat(HBaseInputFormat.class); //Configure projection schema job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema())); Job newJob = new Job(job); HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString); job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString); for (PartInfo partinfo : info.getPartitions()) { for (Entry<String, String> entry : partinfo.getJobProperties().entrySet()) job.set(entry.getKey(), entry.getValue()); } assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS)); job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class); org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); RunningJob runJob = JobClient.runJob(job); runJob.waitForCompletion(); assertTrue(runJob.isSuccessful()); assertFalse(MapReadProjHTable.error); assertEquals(MapReadProjHTable.count, 1); String dropTableQuery = "DROP TABLE " + tableName; CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); assertEquals(0, responseThree.getResponseCode()); boolean isHbaseTableThere = hAdmin.tableExists(tableName); assertFalse(isHbaseTableThere); }
From source file:org.apache.hcatalog.hbase.TestHCatHBaseInputFormat.java
License:Apache License
@Test public void TestHBaseInputFormatProjectionReadMR() throws Exception { String tableName = newTableName("mytable"); String tableQuery = "CREATE TABLE " + tableName + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + "testFamily:testQualifier1,testFamily:testQualifier2')"; CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); assertEquals(0, responseTwo.getResponseCode()); HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); boolean doesTableExist = hAdmin.tableExists(tableName); assertTrue(doesTableExist);//from w w w. j av a2s .c o m populateHBaseTable(tableName, 5); Configuration conf = new Configuration(hcatConf); conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(getHiveConf().getAllProperties())); // output settings Path outputDir = new Path(getTestDir(), "mapred/testHBaseInputFormatProjectionReadMR"); FileSystem fs = getFileSystem(); if (fs.exists(outputDir)) { fs.delete(outputDir, true); } // create job JobConf job = new JobConf(conf); job.setJobName("hbase-scan-column"); job.setJarByClass(this.getClass()); job.setMapperClass(MapReadProjectionHTable.class); job.setInputFormat(HBaseInputFormat.class); //Configure projection schema job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema())); Job newJob = new Job(job); HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString); job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString); for (PartInfo partinfo : info.getPartitions()) { for (Entry<String, String> entry : partinfo.getJobProperties().entrySet()) job.set(entry.getKey(), entry.getValue()); } assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS)); job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class); org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); RunningJob runJob = JobClient.runJob(job); runJob.waitForCompletion(); assertTrue(runJob.isSuccessful()); assertFalse(MapReadProjectionHTable.error); assertEquals(1, MapReadProjectionHTable.count); String dropTableQuery = "DROP TABLE " + tableName; CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); assertEquals(0, responseThree.getResponseCode()); boolean isHbaseTableThere = hAdmin.tableExists(tableName); assertFalse(isHbaseTableThere); }