List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException
From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.CsvBulkLoadTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { HBaseConfiguration.addHbaseResources(getConf()); Configuration conf = getConf(); String quorum = conf.get("hbase.zookeeper.quorum"); String clientPort = conf.get("hbase.zookeeper.property.clientPort"); LOG.info("hbase.zookeeper.quorum=" + quorum); LOG.info("hbase.zookeeper.property.clientPort=" + clientPort); LOG.info("phoenix.query.dateFormat=" + conf.get("phoenix.query.dateFormat")); CommandLine cmdLine = null;/*w ww .j a va2 s. c o m*/ try { cmdLine = parseOptions(args); LOG.info("JdbcUrl=" + getJdbcUrl(quorum + ":" + clientPort)); } catch (IllegalStateException e) { printHelpAndExit(e.getMessage(), getOptions()); } Class.forName(DriverManager.class.getName()); Connection conn = DriverManager.getConnection(getJdbcUrl(quorum + ":" + clientPort)); String tableName = cmdLine.getOptionValue(TABLE_NAME_OPT.getOpt()); String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPT.getOpt()); String qualifiedTableName = getQualifiedTableName(schemaName, tableName); List<ColumnInfo> importColumns = buildImportColumns(conn, cmdLine, qualifiedTableName); LOG.info("tableName=" + tableName); LOG.info("schemaName=" + schemaName); LOG.info("qualifiedTableName=" + qualifiedTableName); configureOptions(cmdLine, importColumns, getConf()); try { validateTable(conn, schemaName, tableName); } finally { conn.close(); } Path inputPath = new Path(cmdLine.getOptionValue(INPUT_PATH_OPT.getOpt())); Path outputPath = null; if (cmdLine.hasOption(OUTPUT_PATH_OPT.getOpt())) { outputPath = new Path(cmdLine.getOptionValue(OUTPUT_PATH_OPT.getOpt())); } else { outputPath = new Path("/tmp/" + UUID.randomUUID()); } LOG.info("Configuring HFile output path to {}", outputPath); Job job = new Job(getConf(), "Phoenix MapReduce import for " + getConf().get(PhoenixCsvToKeyValueMapper.TABLE_NAME_CONFKEY)); // Allow overriding the job jar setting by using a -D system property at startup if (job.getJar() == null) { job.setJarByClass(PhoenixCsvToKeyValueMapper.class); } job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, inputPath); FileSystem.get(getConf()); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(PhoenixCsvToKeyValueMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HTable htable = new HTable(getConf(), qualifiedTableName); // Auto configure partitioner and reducer according to the Main Data table HFileOutputFormat.configureIncrementalLoad(job, htable); LOG.info("Running MapReduce import job from {} to {}", inputPath, outputPath); boolean success = job.waitForCompletion(true); if (!success) { LOG.error("Import job failed, check JobTracker for details"); return 1; } LOG.info("Loading HFiles from {}", outputPath); LoadIncrementalHFiles loader = new LoadIncrementalHFiles(getConf()); loader.doBulkLoad(outputPath, htable); htable.close(); LOG.info("Incremental load complete"); LOG.info("Removing output directory {}", outputPath); if (!FileSystem.get(getConf()).delete(outputPath, true)) { LOG.error("Removing output directory {} failed", outputPath); } return 0; }
From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java
License:Apache License
/** * Sets up the actual job.//from ww w .ja va 2s. co m * * @param conf The current configuration. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String tableName, String inputPath, String tmpOutputPath) throws IOException, ClassNotFoundException { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(CommonConstants.SEPARATOR); if (actualSeparator != null) { conf.set(CommonConstants.SEPARATOR, Base64.encodeBytes(actualSeparator.getBytes())); } String tableNameConf = conf.get(CommonConstants.TABLE_NAME); if (tableNameConf == null) { conf.set(CommonConstants.TABLE_NAME, tableName); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; Path inputDir = new Path(inputPath); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(MutipleColumnImportTsv.class); FileInputFormat.setInputPaths(job, inputDir); //??Dimporttsv.inputFormatInputFormat,TextInputFormat String inputFmtName = conf.get(CommonConstants.INPUTFORMAT, "org.apache.hadoop.mapreduce.lib.input.TextInputFormat"); LOG.info(CommonConstants.INPUTFORMAT + " is " + inputFmtName); Class<? extends InputFormat> inputFmtClass = Class.forName(inputFmtName).asSubclass(InputFormat.class); job.setInputFormatClass(inputFmtClass); job.setMapperClass(mapperClass); String hfileOutPath = tmpOutputPath; if (hfileOutPath != null) { if (!doesTableExist(tableName)) { createTable(conf, tableName); } HTable table = new HTable(conf, tableName); // job.setReducerClass(MutipleColumnReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); HFileOutputFormat.configureIncrementalLoad(job, table); } else { // No reducers. Just write straight to table. Call initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* Guava used by TsvParser */); return job; }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
/** * Sets up the actual job. importtsvmapreduce job * * @param conf The current configuration. * @return The newly created job./*from ww w . java 2 s .c om*/ * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String tableName, String inputPath, String tmpOutputPath) throws IOException, ClassNotFoundException { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. //???BASE64? String actualSeparator = conf.get(CommonConstants.SEPARATOR); if (actualSeparator != null) { conf.set(CommonConstants.SEPARATOR, Base64.encodeBytes(actualSeparator.getBytes())); } // See if a non-default Mapper was set?mapper?SingleColumnImporterMapper String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; Path inputDir = new Path(inputPath); //?job Job job = new Job(conf, NAME + "_" + tableName); //Set the Jar by finding where a given class came from. job.setJarByClass(SingleColumnImportTsv.class); // FileInputFormat.setInputPaths(job, inputDir); //jobinputformat //??Dimporttsv.inputFormatInputFormat,TextInputFormat //??Dimporttsv.inputFormatInputFormat,TextInputFormat String inputFmtName = conf.get(CommonConstants.INPUTFORMAT, "org.apache.hadoop.mapreduce.lib.input.TextInputFormat"); LOG.info(CommonConstants.INPUTFORMAT + " is " + inputFmtName); Class<? extends InputFormat> inputFmtClass = Class.forName(inputFmtName).asSubclass(InputFormat.class); job.setInputFormatClass(inputFmtClass); job.setMapperClass(mapperClass); //mapper job.setMapperClass(mapperClass); String hfileOutPath = tmpOutputPath; if (hfileOutPath != null) { //? if (!doesTableExist(tableName)) { createTable(conf, tableName); } HTable table = new HTable(conf, tableName); //reducer job.setReducerClass(SingleColumnReducer.class); Path outputDir = new Path(hfileOutPath); // FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(TextArrayWritable.class); //job?partition?outputformat?reduce configureIncrementalLoad(job, table); } else {//put // No reducers. Just write straight to table. Call initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* Guava used by TsvParser */); return job; }
From source file:com.airline.analytics.AirlineDelayAnalytics.java
@Override public int run(String[] strings) throws Exception { Job job = Job.getInstance(getConf(), "Hadoop Airline Delay Analytics"); job.setJarByClass(AirlineDelayAnalytics.class); job.setMapperClass(AirlineMapper.class); // job.setCombinerClass(AirlineReducer.class); job.setReducerClass(AirlineReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(strings[0])); FileOutputFormat.setOutputPath(job, new Path(strings[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.airline.analytics.AirlineUniqueRoutesAnalytics.java
@Override public int run(String[] strings) throws Exception { Job job = Job.getInstance(getConf(), "Hadoop Airline Orign Destination Analytics"); job.setJarByClass(getClass());//w ww . j av a 2 s . co m // Distributed Cache job.addCacheFile(new URI("/airline/codes.csv")); job.setMapperClass(AirlineMapper.class); // job.setCombinerClass(AirlineReducer.class); job.setReducerClass(AirlineReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(strings[0])); FileOutputFormat.setOutputPath(job, new Path(strings[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.alectenharmsel.research.hadoop.CodeTokenizer.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MoabLicenses <input> <output>"); System.exit(-1);/*from ww w . j a v a 2s . c om*/ } Configuration conf = getConf(); Job job = new Job(conf, "SrcTok"); job.setJarByClass(CodeTokenizer.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.alectenharmsel.research.SrcTok.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MoabLicenses <input> <output>"); System.exit(-1);/*from w w w .j av a 2s. c o m*/ } Configuration conf = getConf(); Job job = new Job(conf, "SrcTok"); job.setJarByClass(SrcTok.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(SrcTokMapper.class); job.setReducerClass(SrcTokReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.alexholmes.hadooputils.combine.avro.mapreduce.CombineAvroKeyValueInputFormatTest.java
License:Apache License
@Test public void testKeyValueInput() throws ClassNotFoundException, IOException, InterruptedException { // Create a test input file. File inputFile = createInputFile(); // Configure the job input. Job job = new Job(); FileInputFormat.setInputPaths(job, new Path(inputFile.getAbsolutePath())); job.setInputFormatClass(CombineAvroKeyValueInputFormat.class); AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.INT)); AvroJob.setInputValueSchema(job, Schema.create(Schema.Type.STRING)); // Configure a mapper. job.setMapperClass(IndexMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // Configure a reducer. job.setReducerClass(IndexReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(AvroValue.class); AvroJob.setOutputValueSchema(job, Schema.createArray(Schema.create(Schema.Type.INT))); // Configure the output format. job.setOutputFormatClass(AvroKeyValueOutputFormat.class); Path outputPath = new Path(mTempDir.getRoot().getPath(), "out-index"); FileOutputFormat.setOutputPath(job, outputPath); // Run the job. assertTrue(job.waitForCompletion(true)); // Verify that the output Avro container file as the expected data. File avroFile = new File(outputPath.toString(), "part-r-00000.avro"); DatumReader<GenericRecord> datumReader = new SpecificDatumReader<GenericRecord>(AvroKeyValue .getSchema(Schema.create(Schema.Type.STRING), Schema.createArray(Schema.create(Schema.Type.INT)))); DataFileReader<GenericRecord> avroFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader); assertTrue(avroFileReader.hasNext()); AvroKeyValue<CharSequence, List<Integer>> appleRecord = new AvroKeyValue<CharSequence, List<Integer>>( avroFileReader.next());/*from w ww .j a va2 s .c o m*/ assertNotNull(appleRecord.get()); assertEquals("apple", appleRecord.getKey().toString()); List<Integer> appleDocs = appleRecord.getValue(); assertEquals(3, appleDocs.size()); assertTrue(appleDocs.contains(1)); assertTrue(appleDocs.contains(2)); assertTrue(appleDocs.contains(3)); assertTrue(avroFileReader.hasNext()); AvroKeyValue<CharSequence, List<Integer>> bananaRecord = new AvroKeyValue<CharSequence, List<Integer>>( avroFileReader.next()); assertNotNull(bananaRecord.get()); assertEquals("banana", bananaRecord.getKey().toString()); List<Integer> bananaDocs = bananaRecord.getValue(); assertEquals(2, bananaDocs.size()); assertTrue(bananaDocs.contains(1)); assertTrue(bananaDocs.contains(2)); assertTrue(avroFileReader.hasNext()); AvroKeyValue<CharSequence, List<Integer>> carrotRecord = new AvroKeyValue<CharSequence, List<Integer>>( avroFileReader.next()); assertEquals("carrot", carrotRecord.getKey().toString()); List<Integer> carrotDocs = carrotRecord.getValue(); assertEquals(1, carrotDocs.size()); assertTrue(carrotDocs.contains(1)); assertFalse(avroFileReader.hasNext()); avroFileReader.close(); }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileJob.java
License:Apache License
/** * The driver for the MapReduce job./* w w w .ja v a 2 s . c om*/ * * @param conf configuration * @param inputDirAsString input directory in CSV-form * @param outputDirAsString output directory * @return true if the job completed successfully * @throws java.io.IOException if something went wrong * @throws java.net.URISyntaxException if a URI wasn't correctly formed */ public boolean runJob(final Configuration conf, final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException { Job job = new Job(conf); job.setJarByClass(CombineSequenceFileJob.class); job.setJobName("seqfilecombiner"); job.setNumReduceTasks(0); // job.setMapperClass(IdentityMapper.class); job.setInputFormatClass(CombineSequenceFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, inputDirAsString); FileOutputFormat.setOutputPath(job, new Path(outputDirAsString)); Date startTime = new Date(); System.out.println("Job started: " + startTime); boolean jobResult = job.waitForCompletion(true); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds."); return jobResult; }
From source file:com.aliyun.openservices.tablestore.hadoop.TableStoreOutputFormatExample.java
License:Apache License
public static void main(String[] args) throws Exception { if (!parseArgs(args)) { printUsage();//from w w w . ja v a2s. c om System.exit(1); } if (endpoint == null || accessKeyId == null || accessKeySecret == null || inputTable == null || outputTable == null) { printUsage(); System.exit(1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, TableStoreOutputFormatExample.class.getName()); job.setMapperClass(OwnerMapper.class); job.setReducerClass(IntoTableReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(MapWritable.class); job.setInputFormatClass(TableStoreInputFormat.class); job.setOutputFormatClass(TableStoreOutputFormat.class); TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken); TableStore.setEndpoint(job, endpoint, instance); TableStoreInputFormat.addCriteria(job, fetchCriteria()); TableStoreOutputFormat.setOutputTable(job, outputTable); System.exit(job.waitForCompletion(true) ? 0 : 1); }