List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat
public void setOutputFormat(Class<? extends OutputFormat> theClass)
From source file:hydrograph.engine.cascading.scheme.hive.parquet.HiveParquetScheme.java
License:Apache License
@Override public void sinkConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class); jobConf.set(DataWritableWriteSupport.PARQUET_HIVE_SCHEMA, HiveParquetSchemeHelper.getParquetSchemeMessage(hiveTableDescriptor)); ParquetOutputFormat.setWriteSupportClass(jobConf, DataWritableWriteSupport.class); }
From source file:hydrograph.engine.cascading.scheme.parquet.ParquetTupleScheme.java
License:Apache License
@Override public void sinkConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class); jobConf.set(ParquetTupleWriter.PARQUET_CASCADING_SCHEMA, parquetSchema); ParquetOutputFormat.setWriteSupportClass(jobConf, ParquetTupleWriter.class); }
From source file:IndexService.IndexMergeMR.java
License:Open Source License
public static RunningJob run(String inputfiles, String outputdir, Configuration conf) { if (inputfiles == null || outputdir == null) return null; JobConf job = new JobConf(conf); job.setJobName("MergeIndexMR"); job.setJarByClass(IndexMergeMR.class); job.setNumReduceTasks(1);// w w w . ja va 2 s. c o m FileSystem fs = null; try { fs = FileSystem.get(job); fs.delete(new Path(outputdir), true); String[] ifs = inputfiles.split(","); TreeSet<String> files = new TreeSet<String>(); for (int i = 0; i < ifs.length; i++) { IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(ifs[i]); Collection<String> strs = ifdf.fileInfo().head().getUdi().infos().values(); for (String str : strs) { files.add(str); } ifdf.close(); } StringBuffer sb = new StringBuffer(); for (String str : files) { sb.append(str + ","); } job.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1)); IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(ifs[0]); HashMap<Integer, IRecord.IFType> map = ifdf.fileInfo().head().fieldMap().fieldtypes(); ArrayList<String> fieldStrings = new ArrayList<String>(); for (int i = 0; i < map.size(); i++) { IRecord.IFType type = map.get(i); fieldStrings.add(type.type() + ConstVar.RecordSplit + type.idx()); } job.setStrings(ConstVar.HD_fieldMap, fieldStrings.toArray(new String[fieldStrings.size()])); job.set("datafiletype", ifdf.fileInfo().head().getUdi().infos().get(123456)); ifdf.close(); } catch (Exception e2) { e2.printStackTrace(); } FileInputFormat.setInputPaths(job, inputfiles); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(IndexKey.class); job.setOutputValueClass(IndexValue.class); job.setPartitionerClass(IndexMergePartitioner.class); job.setMapperClass(MergeIndexMap.class); job.setCombinerClass(MergeIndexReduce.class); job.setReducerClass(MergeIndexReduce.class); job.setInputFormat(IndexMergeIFormatInputFormat.class); job.setOutputFormat(IndexMergeIFormatOutputFormat.class); try { JobClient jc = new JobClient(job); return jc.submitJob(job); } catch (IOException e) { e.printStackTrace(); return null; } }
From source file:IndexService.IndexMR.java
License:Open Source License
public static RunningJob run(Configuration conf2, String inputfiles, boolean column, String ids, String outputdir) {/* w ww.j av a2s .c o m*/ if (inputfiles == null || outputdir == null) return null; JobConf conf = new JobConf(conf2); conf.setJobName("IndexMR:\t" + ids); conf.setJarByClass(IndexMR.class); FileSystem fs = null; try { fs = FileSystem.get(conf); fs.delete(new Path(outputdir), true); } catch (IOException e3) { e3.printStackTrace(); } conf.set("index.ids", ids); if (column) { conf.set("datafiletype", "column"); } else { conf.set("datafiletype", "format"); } String[] ifs = inputfiles.split(","); long wholerecnum = 0; String[] idxs = ids.split(","); String[] fieldStrings = new String[idxs.length + 2]; if (!column) { IFormatDataFile ifdf; try { ifdf = new IFormatDataFile(conf); ifdf.open(ifs[0]); for (int i = 0; i < idxs.length; i++) { int id = Integer.parseInt(idxs[i]); byte type = ifdf.fileInfo().head().fieldMap().fieldtypes().get(id).type(); fieldStrings[i] = type + ConstVar.RecordSplit + i; } ifdf.close(); } catch (IOException e) { e.printStackTrace(); } } else { try { IColumnDataFile icdf = new IColumnDataFile(conf); icdf.open(ifs[0]); for (int i = 0; i < idxs.length; i++) { int id = Integer.parseInt(idxs[i]); byte type = icdf.fieldtypes().get(id).type(); fieldStrings[i] = type + ConstVar.RecordSplit + i; } icdf.close(); } catch (IOException e) { e.printStackTrace(); } } fieldStrings[fieldStrings.length - 2] = ConstVar.FieldType_Short + ConstVar.RecordSplit + (fieldStrings.length - 2); fieldStrings[fieldStrings.length - 1] = ConstVar.FieldType_Int + ConstVar.RecordSplit + (fieldStrings.length - 1); conf.setStrings(ConstVar.HD_fieldMap, fieldStrings); if (!column) { conf.set(ConstVar.HD_index_filemap, inputfiles); for (String file : ifs) { IFormatDataFile fff; try { fff = new IFormatDataFile(conf); fff.open(file); wholerecnum += fff.segIndex().recnum(); fff.close(); } catch (IOException e) { e.printStackTrace(); } } } else { HashSet<String> files = new HashSet<String>(); for (String file : ifs) { files.add(file); } StringBuffer sb = new StringBuffer(); for (String str : files) { sb.append(str).append(","); } conf.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1)); for (String file : files) { Path parent = new Path(file).getParent(); try { FileStatus[] fss = fs.listStatus(parent); String openfile = ""; for (FileStatus status : fss) { if (status.getPath().toString().contains(file)) { openfile = status.getPath().toString(); break; } } IFormatDataFile fff = new IFormatDataFile(conf); fff.open(openfile); wholerecnum += fff.segIndex().recnum(); fff.close(); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } } conf.setNumReduceTasks((int) ((wholerecnum - 1) / (100000000) + 1)); FileInputFormat.setInputPaths(conf, inputfiles); Path outputPath = new Path(outputdir); FileOutputFormat.setOutputPath(conf, outputPath); conf.setOutputKeyClass(IndexKey.class); conf.setOutputValueClass(IndexValue.class); conf.setPartitionerClass(IndexPartitioner.class); conf.setMapperClass(IndexMap.class); conf.setCombinerClass(IndexReduce.class); conf.setReducerClass(IndexReduce.class); if (column) { conf.setInputFormat(IColumnInputFormat.class); } else { conf.setInputFormat(IFormatInputFormat.class); } conf.setOutputFormat(IndexIFormatOutputFormat.class); try { JobClient jc = new JobClient(conf); return jc.submitJob(conf); } catch (IOException e) { e.printStackTrace(); return null; } }
From source file:infinidb.hadoop.db.InfiniDBOutputFormat.java
License:Apache License
/** * Initializes the reduce-part of the job with the appropriate output settings * //from www .j ava 2 s.c om * @param job * The job * @param tableName * The table to insert data into * @param fieldNames * The field names in the table. If unknown, supply the appropriate * number of nulls. */ public static void setOutput(JobConf job, String schemaName, String... tableNames) { job.setOutputFormat(InfiniDBOutputFormat.class); job.setReduceSpeculativeExecution(false); InfiniDBConfiguration dbConf = new InfiniDBConfiguration(job); dbConf.setOutputSchemaName(schemaName); dbConf.setOutputTableNames(tableNames); }
From source file:infinidb.hadoop.db.InfiniDBOutputFormat.java
License:Apache License
/** * Initializes the reduce-part of the job with the appropriate output settings * /*from www. j a va 2s. co m*/ * @param job * The job * @param tableName * The table to insert data into * @param fieldNames * The field names in the table. If unknown, supply the appropriate * number of nulls. */ public static void setOutput(JobConf job, String schemaName) { job.setOutputFormat(InfiniDBOutputFormat.class); job.setReduceSpeculativeExecution(false); InfiniDBConfiguration dbConf = new InfiniDBConfiguration(job); dbConf.setOutputSchemaName(schemaName); }
From source file:infinidb.hadoop.example.InfiniDBOutputDriver.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); JobConf jobconf = new JobConf(conf, InfiniDoopDriver.class); DBConfiguration.configureDB(jobconf, "com.mysql.jdbc.Driver", "jdbc:mysql://srvswint4/tpch1", "root", ""); String[] fields = { "n_nationkey", "n_name" }; String[] outFields = { "id", "name" }; jobconf.setInputFormat(IDBFileInputFormat.class); jobconf.setOutputFormat(InfiniDBOutputFormat.class); jobconf.setOutputKeyClass(NullWritable.class); jobconf.setOutputValueClass(Text.class); InfiniDBOutputFormat.setOutput(jobconf, "db", outFields); InfiniDBConfiguration idbconf = new InfiniDBConfiguration(jobconf); idbconf.setInputPath("input"); idbconf.setOutputPath("output"); idbconf.setInfiniDBHome("/usr/local/Calpont"); jobconf.setMapperClass(InfiniDoopMapper.class); jobconf.setNumMapTasks(1);//from w ww. ja v a 2 s . com jobconf.setNumReduceTasks(2); JobClient client = new JobClient(); client.setConf(jobconf); try { JobClient.runJob(jobconf); } catch (Exception e) { e.printStackTrace(); } return 0; }
From source file:invertedIndex.startJob.java
public static void start(String[] args) { try {//from www . j av a 2 s .c o m JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); lineIndexMapper Map = new lineIndexMapper(); conf.setMapperClass(Map.getClass()); lineIndexReducer Reduce = new lineIndexReducer(); conf.setCombinerClass(Reduce.getClass()); conf.setReducerClass(Reduce.getClass()); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputDir = new Path(args[2]); outputDir.getFileSystem(conf).delete(outputDir, true); FileSystem fs = FileSystem.get(conf); fs.delete(outputDir, true); FileOutputFormat.setOutputPath(conf, outputDir); JobClient.runJob(conf); } catch (Exception Exp) { Exp.printStackTrace(); } }
From source file:io.bfscan.clueweb12.DumpWarcRecordsToPlainText.java
License:Apache License
/** * Runs this tool.//from ww w. jav a 2 s .co m */ @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String input = cmdline.getOptionValue(INPUT_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); LOG.info("Tool name: " + DumpWarcRecordsToPlainText.class.getSimpleName()); LOG.info(" - input: " + input); LOG.info(" - output: " + output); JobConf conf = new JobConf(getConf(), DumpWarcRecordsToPlainText.class); conf.setJobName(DumpWarcRecordsToPlainText.class.getSimpleName() + ":" + input); conf.setNumReduceTasks(0); FileInputFormat.addInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, new Path(output)); conf.setInputFormat(ClueWeb12InputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(MyMapper.class); RunningJob job = JobClient.runJob(conf); Counters counters = job.getCounters(); int numDocs = (int) counters.findCounter(Records.PAGES).getCounter(); LOG.info("Read " + numDocs + " docs."); return 0; }
From source file:io.fluo.stress.trie.Generate.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 4) { log.error("Usage: " + this.getClass().getSimpleName() + " <numMappers> <numbersPerMapper> <max> <output dir>"); System.exit(-1);/*from w w w . j ava 2 s.c o m*/ } int numMappers = Integer.parseInt(args[0]); int numPerMapper = Integer.parseInt(args[1]); long max = Long.parseLong(args[2]); Path out = new Path(args[3]); Preconditions.checkArgument(numMappers > 0, "numMappers <= 0"); Preconditions.checkArgument(numPerMapper > 0, "numPerMapper <= 0"); Preconditions.checkArgument(max > 0, "max <= 0"); JobConf job = new JobConf(getConf()); job.setJobName(this.getClass().getName()); job.setJarByClass(Generate.class); job.setInt(TRIE_GEN_NUM_PER_MAPPER_PROP, numPerMapper); job.setInt(TRIE_GEN_NUM_MAPPERS_PROP, numMappers); job.setLong(TRIE_GEN_MAX_PROP, max); job.setInputFormat(RandomLongInputFormat.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, out); RunningJob runningJob = JobClient.runJob(job); runningJob.waitForCompletion(); return runningJob.isSuccessful() ? 0 : -1; }