List of usage examples for org.apache.hadoop.mapreduce.lib.output MultipleOutputs addNamedOutput
@SuppressWarnings("unchecked") public static void addNamedOutput(Job job, String namedOutput, Class<? extends OutputFormat> outputFormatClass, Class<?> keyClass, Class<?> valueClass)
From source file:org.apache.kylin.engine.mr.steps.FactDistinctColumnsJob.java
License:Apache License
private void setupReducer(Path output, int numberOfReducers) throws IOException { job.setReducerClass(FactDistinctColumnsReducer.class); job.setPartitionerClass(FactDistinctColumnPartitioner.class); job.setNumReduceTasks(numberOfReducers); //make each reducer output to respective dir MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); //prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
From source file:org.apache.kylin.engine.mr.steps.UHCDictionaryJob.java
License:Apache License
private void setupReducer(Path output, int numberOfReducers) throws IOException { job.setReducerClass(UHCDictionaryReducer.class); job.setPartitionerClass(UHCDictionaryPartitioner.class); job.setNumReduceTasks(numberOfReducers); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); //prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
From source file:org.apache.kylin.engine.spark.SparkFactDistinct.java
License:Apache License
@Override protected void execute(OptionsHelper optionsHelper) throws Exception { String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME); String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL); String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID); String hiveTable = optionsHelper.getOptionValue(OPTION_INPUT_TABLE); String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH); String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH); String counterPath = optionsHelper.getOptionValue(OPTION_COUNTER_PATH); int samplingPercent = Integer.parseInt(optionsHelper.getOptionValue(OPTION_STATS_SAMPLING_PERCENT)); Class[] kryoClassArray = new Class[] { Class.forName("scala.reflect.ClassTag$$anon$1"), Class.forName("org.apache.kylin.engine.mr.steps.SelfDefineSortableKey") }; SparkConf conf = new SparkConf() .setAppName("Fact distinct columns for:" + cubeName + " segment " + segmentId); //serialization conf conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.kryo.registrator", "org.apache.kylin.engine.spark.KylinKryoRegistrator"); conf.set("spark.kryo.registrationRequired", "true").registerKryoClasses(kryoClassArray); KylinSparkJobListener jobListener = new KylinSparkJobListener(); try (JavaSparkContext sc = new JavaSparkContext(conf)) { sc.sc().addSparkListener(jobListener); HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(outputPath)); final SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration()); KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl); final CubeInstance cubeInstance = CubeManager.getInstance(envConfig).getCube(cubeName); final Job job = Job.getInstance(sConf.get()); final FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping( cubeInstance);// ww w . jav a 2 s . c om logger.info("RDD Output path: {}", outputPath); logger.info("getTotalReducerNum: {}", reducerMapping.getTotalReducerNum()); logger.info("getCuboidRowCounterReducerNum: {}", reducerMapping.getCuboidRowCounterReducerNum()); logger.info("counter path {}", counterPath); boolean isSequenceFile = JoinedFlatTable.SEQUENCEFILE .equalsIgnoreCase(envConfig.getFlatTableStorageFormat()); // calculate source record bytes size final LongAccumulator bytesWritten = sc.sc().longAccumulator(); final JavaRDD<String[]> recordRDD = SparkUtil.hiveRecordInputRDD(isSequenceFile, sc, inputPath, hiveTable); JavaPairRDD<SelfDefineSortableKey, Text> flatOutputRDD = recordRDD.mapPartitionsToPair( new FlatOutputFucntion(cubeName, segmentId, metaUrl, sConf, samplingPercent, bytesWritten)); JavaPairRDD<SelfDefineSortableKey, Iterable<Text>> aggredRDD = flatOutputRDD.groupByKey( new FactDistinctPartitioner(cubeName, metaUrl, sConf, reducerMapping.getTotalReducerNum())); JavaPairRDD<String, Tuple3<Writable, Writable, String>> outputRDD = aggredRDD .mapPartitionsToPair(new MultiOutputFunction(cubeName, metaUrl, sConf, samplingPercent)); // make each reducer output to respective dir MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileOutputFormat.setCompressOutput(job, false); // prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputsRDD multipleOutputsRDD = MultipleOutputsRDD.rddToMultipleOutputsRDD(outputRDD); multipleOutputsRDD.saveAsNewAPIHadoopDatasetWithMultipleOutputs(job.getConfiguration()); long recordCount = recordRDD.count(); logger.info("Map input records={}", recordCount); logger.info("HDFS Read: {} HDFS Write", bytesWritten.value()); Map<String, String> counterMap = Maps.newHashMap(); counterMap.put(ExecutableConstants.SOURCE_RECORDS_COUNT, String.valueOf(recordCount)); counterMap.put(ExecutableConstants.SOURCE_RECORDS_SIZE, String.valueOf(bytesWritten.value())); // save counter to hdfs HadoopUtil.writeToSequenceFile(sc.hadoopConfiguration(), counterPath, counterMap); HadoopUtil.deleteHDFSMeta(metaUrl); } }
From source file:org.apache.mahout.utils.SplitInputJob.java
License:Apache License
/** * Run job to downsample, randomly permute and split data into test and * training sets. This job takes a SequenceFile as input and outputs two * SequenceFiles test-r-00000 and training-r-00000 which contain the test and * training sets respectively/*from w w w . j a v a2 s . com*/ * * @param initialConf * @param inputPath * path to input data SequenceFile * @param outputPath * path for output data SequenceFiles * @param keepPct * percentage of key value pairs in input to keep. The rest are * discarded * @param randomSelectionPercent * percentage of key value pairs to allocate to test set. Remainder * are allocated to training set */ @SuppressWarnings("rawtypes") public static void run(Configuration initialConf, Path inputPath, Path outputPath, int keepPct, float randomSelectionPercent) throws IOException, ClassNotFoundException, InterruptedException { int downsamplingFactor = (int) (100.0 / keepPct); initialConf.setInt(DOWNSAMPLING_FACTOR, downsamplingFactor); initialConf.setFloat(RANDOM_SELECTION_PCT, randomSelectionPercent); // Determine class of keys and values FileSystem fs = FileSystem.get(initialConf); SequenceFileDirIterator<? extends WritableComparable, Writable> iterator = new SequenceFileDirIterator<WritableComparable, Writable>( inputPath, PathType.LIST, PathFilters.partFilter(), null, false, fs.getConf()); Class<? extends WritableComparable> keyClass; Class<? extends Writable> valueClass; if (iterator.hasNext()) { Pair<? extends WritableComparable, Writable> pair = iterator.next(); keyClass = pair.getFirst().getClass(); valueClass = pair.getSecond().getClass(); } else { throw new IllegalStateException("Couldn't determine class of the input values"); } Job job = new Job(new Configuration(initialConf)); MultipleOutputs.addNamedOutput(job, TRAINING_TAG, SequenceFileOutputFormat.class, keyClass, valueClass); MultipleOutputs.addNamedOutput(job, TEST_TAG, SequenceFileOutputFormat.class, keyClass, valueClass); job.setJarByClass(SplitInputJob.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.setNumReduceTasks(1); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(SplitInputMapper.class); job.setReducerClass(SplitInputReducer.class); job.setSortComparatorClass(SplitInputComparator.class); job.setOutputKeyClass(keyClass); job.setOutputValueClass(valueClass); job.submit(); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } }
From source file:org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool.java
License:Apache License
private boolean computeExpTable() throws IOException, ClassNotFoundException, InterruptedException { boolean success; logger.info("Creating expTable"); // The split location for the interim calculations, delete upon completion Path splitDir = new Path("/tmp/splits-" + queryInfo.getIdentifier()); if (fs.exists(splitDir)) { fs.delete(splitDir, true);/*from www.j a v a 2 s .co m*/ } // Write the query hashes to the split files Map<Integer, BigInteger> queryElements = query.getQueryElements(); List<Integer> keys = new ArrayList<>(queryElements.keySet()); int numSplits = SystemConfiguration.getIntProperty("pir.expCreationSplits", 100); int elementsPerSplit = queryElements.size() / numSplits; // Integral division. logger.info("numSplits = " + numSplits + " elementsPerSplit = " + elementsPerSplit); for (int i = 0; i < numSplits; ++i) { // Grab the range of the thread int start = i * elementsPerSplit; int stop = start + elementsPerSplit - 1; if (i == (numSplits - 1)) { stop = queryElements.size() - 1; } HDFS.writeFileIntegers(keys.subList(start, stop), fs, new Path(splitDir, "split-" + i), false); } // Run the job to generate the expTable // Job jobExp = new Job(mrConfig.getConfig(), "pirExp-" + pirWL.getWatchlistNum()); Job jobExp = Job.getInstance(conf, "pirExp-" + queryInfo.getIdentifier()); jobExp.setSpeculativeExecution(false); jobExp.getConfiguration().set("mapreduce.map.speculative", "false"); jobExp.getConfiguration().set("mapreduce.reduce.speculative", "false"); // Set the memory and heap options jobExp.getConfiguration().set("mapreduce.map.memory.mb", SystemConfiguration.getProperty("mapreduce.map.memory.mb", "10000")); jobExp.getConfiguration().set("mapreduce.reduce.memory.mb", SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "10000")); jobExp.getConfiguration().set("mapreduce.map.java.opts", SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx9000m")); jobExp.getConfiguration().set("mapreduce.reduce.java.opts", SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx9000m")); jobExp.getConfiguration().set("mapreduce.reduce.shuffle.parallelcopies", "5"); jobExp.getConfiguration().set("pirMR.queryInputDir", SystemConfiguration.getProperty("pir.queryInput")); jobExp.getConfiguration().setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true); jobExp.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(jobExp, splitDir); jobExp.setJarByClass(ExpTableMapper.class); jobExp.setMapperClass(ExpTableMapper.class); jobExp.setMapOutputKeyClass(Text.class); jobExp.setMapOutputValueClass(Text.class); // Set the reducer and output params int numExpLookupPartitions = SystemConfiguration.getIntProperty("pir.numExpLookupPartitions", 100); jobExp.setNumReduceTasks(numExpLookupPartitions); jobExp.setReducerClass(ExpTableReducer.class); // Delete the output directory if it exists Path outPathExp = new Path(outputDirExp); if (fs.exists(outPathExp)) { fs.delete(outPathExp, true); } jobExp.setOutputKeyClass(Text.class); jobExp.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(jobExp, outPathExp); jobExp.getConfiguration().set("mapreduce.output.textoutputformat.separator", ","); MultipleOutputs.addNamedOutput(jobExp, FileConst.PIR, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addNamedOutput(jobExp, FileConst.EXP, TextOutputFormat.class, Text.class, Text.class); // Submit job, wait for completion success = jobExp.waitForCompletion(true); // Assemble the exp table from the output // element_index -> fileName Map<Integer, String> expFileTable = new HashMap<>(); FileStatus[] status = fs.listStatus(outPathExp); for (FileStatus fstat : status) { if (fstat.getPath().getName().startsWith(FileConst.PIR)) { logger.info("fstat.getPath().getName().toString() = " + fstat.getPath().getName()); try { try (BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(fstat.getPath())))) { String line; while ((line = br.readLine()) != null) { String[] rowValTokens = line.split(","); // form is element_index,reducerNumber String fileName = fstat.getPath().getParent() + "/" + FileConst.EXP + "-r-" + rowValTokens[1]; logger.info("fileName = " + fileName); expFileTable.put(Integer.parseInt(rowValTokens[0]), fileName); } } } catch (Exception e) { e.printStackTrace(); } } } // Place exp table in query object query.setExpFileBasedLookup(expFileTable); new HadoopFileSystemStore(fs).store(queryInputDir, query); logger.info("Completed creation of expTable"); return success; }
From source file:org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool.java
License:Apache License
@SuppressWarnings("unchecked") private boolean readDataEncRows(Path outPathInit) throws Exception { boolean success; Job job = Job.getInstance(conf, "pirMR"); job.setSpeculativeExecution(false);//from w w w . j ava2s. c om // Set the data and query schema properties job.getConfiguration().set("dataSchemaName", qSchema.getDataSchemaName()); job.getConfiguration().set("data.schemas", SystemConfiguration.getProperty("data.schemas")); job.getConfiguration().set("query.schemas", SystemConfiguration.getProperty("query.schemas")); // Set the memory and heap options job.getConfiguration().set("mapreduce.map.memory.mb", SystemConfiguration.getProperty("mapreduce.map.memory.mb", "2000")); job.getConfiguration().set("mapreduce.reduce.memory.mb", SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "2000")); job.getConfiguration().set("mapreduce.map.java.opts", SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx1800m")); job.getConfiguration().set("mapreduce.reduce.java.opts", SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx1800m")); // Set necessary files for Mapper setup job.getConfiguration().set("pirMR.queryInputDir", SystemConfiguration.getProperty("pir.queryInput")); job.getConfiguration().set("pirMR.stopListFile", SystemConfiguration.getProperty("pir.stopListFile")); job.getConfiguration().set("mapreduce.map.speculative", "false"); job.getConfiguration().set("mapreduce.reduce.speculative", "false"); job.getConfiguration().set("pirWL.useLocalCache", SystemConfiguration.getProperty("pir.useLocalCache", "true")); job.getConfiguration().set("pirWL.limitHitsPerSelector", SystemConfiguration.getProperty("pir.limitHitsPerSelector", "false")); job.getConfiguration().set("pirWL.maxHitsPerSelector", SystemConfiguration.getProperty("pir.maxHitsPerSelector", "100")); if (dataInputFormat.equals(InputFormatConst.ES)) { String jobName = "pirMR_es_" + esResource + "_" + esQuery + "_" + System.currentTimeMillis(); job.setJobName(jobName); job.getConfiguration().set("es.nodes", SystemConfiguration.getProperty("es.nodes")); job.getConfiguration().set("es.port", SystemConfiguration.getProperty("es.port")); job.getConfiguration().set("es.resource", esResource); job.getConfiguration().set("es.query", esQuery); job.setInputFormatClass(EsInputFormat.class); } else if (dataInputFormat.equals(InputFormatConst.BASE_FORMAT)) { String baseQuery = SystemConfiguration.getProperty("pir.baseQuery"); String jobName = "pirMR_base_" + baseQuery + "_" + System.currentTimeMillis(); job.setJobName(jobName); job.getConfiguration().set("baseQuery", baseQuery); job.getConfiguration().set("query", baseQuery); job.getConfiguration().set("pir.allowAdHocQuerySchemas", SystemConfiguration.getProperty("pir.allowAdHocQuerySchemas", "false")); job.getConfiguration().setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true); // Set the inputFormatClass based upon the baseInputFormat property String classString = SystemConfiguration.getProperty("pir.baseInputFormat"); Class<BaseInputFormat> inputClass = (Class<BaseInputFormat>) Class.forName(classString); if (!Class.forName("org.apache.pirk.inputformat.hadoop.BaseInputFormat").isAssignableFrom(inputClass)) { throw new Exception("baseInputFormat class = " + classString + " does not extend BaseInputFormat"); } job.setInputFormatClass(inputClass); FileInputFormat.setInputPaths(job, inputFile); } job.setJarByClass(HashSelectorsAndPartitionDataMapper.class); job.setMapperClass(HashSelectorsAndPartitionDataMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(BytesArrayWritable.class); // Set the reducer and output params job.setNumReduceTasks(numReduceTasks); job.setReducerClass(RowCalcReducer.class); // Delete the output directory if it exists if (fs.exists(outPathInit)) { fs.delete(outPathInit, true); } job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, outPathInit); job.getConfiguration().set("mapreduce.output.textoutputformat.separator", ","); MultipleOutputs.addNamedOutput(job, FileConst.PIR, TextOutputFormat.class, LongWritable.class, Text.class); // Submit job, wait for completion success = job.waitForCompletion(true); return success; }
From source file:org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool.java
License:Apache License
private boolean multiplyColumns(Path outPathInit, Path outPathColumnMult) throws IOException, ClassNotFoundException, InterruptedException { boolean success; Job columnMultJob = Job.getInstance(conf, "pir_columnMult"); columnMultJob.setSpeculativeExecution(false); String columnMultJobName = "pir_columnMult"; // Set the same job configs as for the first iteration columnMultJob.getConfiguration().set("mapreduce.map.memory.mb", SystemConfiguration.getProperty("mapreduce.map.memory.mb", "2000")); columnMultJob.getConfiguration().set("mapreduce.reduce.memory.mb", SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "2000")); columnMultJob.getConfiguration().set("mapreduce.map.java.opts", SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx1800m")); columnMultJob.getConfiguration().set("mapreduce.reduce.java.opts", SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx1800m")); columnMultJob.getConfiguration().set("mapreduce.map.speculative", "false"); columnMultJob.getConfiguration().set("mapreduce.reduce.speculative", "false"); columnMultJob.getConfiguration().set("pirMR.queryInputDir", SystemConfiguration.getProperty("pir.queryInput")); columnMultJob.setJobName(columnMultJobName); columnMultJob.setJarByClass(ColumnMultMapper.class); columnMultJob.setNumReduceTasks(numReduceTasks); // Set the Mapper, InputFormat, and input path columnMultJob.setMapperClass(ColumnMultMapper.class); columnMultJob.setInputFormatClass(TextInputFormat.class); FileStatus[] status = fs.listStatus(outPathInit); for (FileStatus fstat : status) { if (fstat.getPath().getName().startsWith(FileConst.PIR)) { logger.info("fstat.getPath() = " + fstat.getPath().toString()); FileInputFormat.addInputPath(columnMultJob, fstat.getPath()); }/*from ww w. j a va 2 s .c o m*/ } columnMultJob.setMapOutputKeyClass(LongWritable.class); columnMultJob.setMapOutputValueClass(Text.class); // Set the reducer and output options columnMultJob.setReducerClass(ColumnMultReducer.class); columnMultJob.setOutputKeyClass(LongWritable.class); columnMultJob.setOutputValueClass(Text.class); columnMultJob.getConfiguration().set("mapreduce.output.textoutputformat.separator", ","); // Delete the output file, if it exists if (fs.exists(outPathColumnMult)) { fs.delete(outPathColumnMult, true); } FileOutputFormat.setOutputPath(columnMultJob, outPathColumnMult); MultipleOutputs.addNamedOutput(columnMultJob, FileConst.PIR_COLS, TextOutputFormat.class, LongWritable.class, Text.class); // Submit job, wait for completion success = columnMultJob.waitForCompletion(true); return success; }
From source file:org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool.java
License:Apache License
private boolean computeFinalResponse(Path outPathFinal) throws ClassNotFoundException, IOException, InterruptedException { boolean success; Job finalResponseJob = Job.getInstance(conf, "pir_finalResponse"); finalResponseJob.setSpeculativeExecution(false); String finalResponseJobName = "pir_finalResponse"; // Set the same job configs as for the first iteration finalResponseJob.getConfiguration().set("mapreduce.map.memory.mb", SystemConfiguration.getProperty("mapreduce.map.memory.mb", "2000")); finalResponseJob.getConfiguration().set("mapreduce.reduce.memory.mb", SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "2000")); finalResponseJob.getConfiguration().set("mapreduce.map.java.opts", SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx1800m")); finalResponseJob.getConfiguration().set("mapreduce.reduce.java.opts", SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx1800m")); finalResponseJob.getConfiguration().set("pirMR.queryInputDir", SystemConfiguration.getProperty("pir.queryInput")); finalResponseJob.getConfiguration().set("pirMR.outputFile", outputFile); finalResponseJob.getConfiguration().set("mapreduce.map.speculative", "false"); finalResponseJob.getConfiguration().set("mapreduce.reduce.speculative", "false"); finalResponseJob.setJobName(finalResponseJobName); finalResponseJob.setJarByClass(ColumnMultMapper.class); finalResponseJob.setNumReduceTasks(1); // Set the Mapper, InputFormat, and input path finalResponseJob.setMapperClass(ColumnMultMapper.class); finalResponseJob.setInputFormatClass(TextInputFormat.class); FileStatus[] status = fs.listStatus(new Path(outputDirColumnMult)); for (FileStatus fstat : status) { if (fstat.getPath().getName().startsWith(FileConst.PIR_COLS)) { logger.info("fstat.getPath() = " + fstat.getPath().toString()); FileInputFormat.addInputPath(finalResponseJob, fstat.getPath()); }//www . ja va 2 s . c o m } finalResponseJob.setMapOutputKeyClass(LongWritable.class); finalResponseJob.setMapOutputValueClass(Text.class); // Set the reducer and output options finalResponseJob.setReducerClass(FinalResponseReducer.class); finalResponseJob.setOutputKeyClass(LongWritable.class); finalResponseJob.setOutputValueClass(Text.class); finalResponseJob.getConfiguration().set("mapreduce.output.textoutputformat.separator", ","); // Delete the output file, if it exists if (fs.exists(outPathFinal)) { fs.delete(outPathFinal, true); } FileOutputFormat.setOutputPath(finalResponseJob, outPathFinal); MultipleOutputs.addNamedOutput(finalResponseJob, FileConst.PIR_FINAL, TextOutputFormat.class, LongWritable.class, Text.class); // Submit job, wait for completion success = finalResponseJob.waitForCompletion(true); return success; }
From source file:org.apache.rya.reasoning.mr.AbstractReasoningTool.java
License:Apache License
/** * Set up the MapReduce job to output a schema (TBox). *///from w w w. ja v a2 s . c o m protected void configureSchemaOutput() { Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration()); SequenceFileOutputFormat.setOutputPath(job, outPath); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SchemaWritable.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputs.addNamedOutput(job, "schemaobj", SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); }
From source file:org.apache.rya.reasoning.mr.AbstractReasoningTool.java
License:Apache License
/** * Set up a MapReduce job to output newly derived triples. * @param intermediate True if this is intermediate data. Outputs * to [base]-[iteration]-[temp]. *///from w w w . j a v a 2 s . c om protected void configureDerivationOutput(boolean intermediate) { Path outPath; Configuration conf = job.getConfiguration(); int iteration = MRReasoningUtils.getCurrentIteration(conf); if (intermediate) { outPath = MRReasoningUtils.getOutputPath(conf, MRReasoningUtils.OUTPUT_BASE + iteration + MRReasoningUtils.TEMP_SUFFIX); } else { outPath = MRReasoningUtils.getOutputPath(conf, MRReasoningUtils.OUTPUT_BASE + iteration); } SequenceFileOutputFormat.setOutputPath(job, outPath); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, SequenceFileOutputFormat.class, Fact.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT, SequenceFileOutputFormat.class, Fact.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT, SequenceFileOutputFormat.class, Fact.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT, SequenceFileOutputFormat.class, Derivation.class, NullWritable.class); MultipleOutputs.setCountersEnabled(job, true); // Set up an output for diagnostic info, if needed MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); }