List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath
public static void setOutputPath(Job job, Path outputDir)
From source file:com.kse.bigdata.main.Driver.java
License:Apache License
public static void main(String[] args) throws Exception { /********************************************************************************** ** Merge the source files into one. ** /** Should change the directories of each file before executing the program ** ***********************************************************************************/ // String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug"; // String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv"; // File resultFile = new File(resultFileDirectory); // if(!resultFile.exists()) // new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles(); /********************************************************************************** * Hadoop Operation.//w w w .java 2 s . co m * Befort Start, Check the Length of Sequence We Want to Predict. **********************************************************************************/ Configuration conf = new Configuration(); //Enable MapReduce intermediate compression as Snappy conf.setBoolean("mapred.compress.map.output", true); conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //Enable Profiling //conf.setBoolean("mapred.task.profile", true); String testPath = null; String inputPath = null; String outputPath = null; int sampleSize = 1; ArrayList<String> results = new ArrayList<String>(); for (int index = 0; index < args.length; index++) { /* * Mandatory command */ //Extract input path string from command line. if (args[index].equals("-in")) inputPath = args[index + 1]; //Extract output path string from command line. if (args[index].equals("-out")) outputPath = args[index + 1]; //Extract test data path string from command line. if (args[index].equals("-test")) testPath = args[index + 1]; /* * Optional command */ //Extract a number of neighbors. if (args[index].equals("-nn")) conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1])); //Whether job uses normalization or not. if (args[index].equals("-norm")) conf.setBoolean(Map.NORMALIZATION, true); //Extract the number of sample size to test. if (args[index].equals("-s")) sampleSize = Integer.valueOf(args[index + 1]); //Whether job uses mean or median //[Default : mean] if (args[index].equals("-med")) conf.setBoolean(Reduce.MEDIAN, true); } String outputFileName = "part-r-00000"; SequenceSampler sampler = new SequenceSampler(testPath, sampleSize); LinkedList<Sequence> testSequences = sampler.getRandomSample(); // Test Sequence // String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100"; // Sequence testSeq = new Sequence(testSeqString); // LinkedList<Sequence> testSequences = new LinkedList<>(); // testSequences.add(testSeq); for (Sequence seq : testSequences) { /* ******************** Hadoop Launch *********************** */ System.out.println(seq.getTailString()); conf.set(Map.INPUT_SEQUENCE, seq.toString()); Job job = new Job(conf); job.setJarByClass(Driver.class); job.setJobName("term-project-driver"); job.setMapperClass(Map.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); // Should think another way to implement the combiner class // Current Implementation is not helpful to Job. // job.setCombinerClass(Combiner.class); //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set. job.setNumReduceTasks(1); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); /* * if job finishes, get result of the job and store it in results(list). */ try { FileSystem hdfs = FileSystem.get(new Configuration()); BufferedReader fileReader = new BufferedReader( new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName)))); String line; while ((line = fileReader.readLine()) != null) { results.add(seq.getSeqString() + " " + line); } fileReader.close(); hdfs.delete(new Path(outputPath), true); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } /* * if all jobs finish, store results of jobs to output/result.txt file. */ String finalOutputPath = "output/result.csv"; try { FileSystem hdfs = FileSystem.get(new Configuration()); Path file = new Path(finalOutputPath); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file); PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8")); //CSV File Header printWriter.println("Actual,Predicted,MER,MAE"); printWriter.flush(); for (String result : results) { String[] tokens = result.split("\\s+"); printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]); printWriter.flush(); } printWriter.close(); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } }
From source file:com.kxen.han.projection.giraph.BspCase.java
License:Apache License
/** * Helper method to remove an old output directory if it exists, * and set the output path for any VertexOutputFormat that uses * FileOutputFormat./*w ww . j a v a 2 s .co m*/ * * @param job Job to set the output path for * @param outputPath Path to output * @throws IOException */ public static void removeAndSetOutput(GiraphJob job, Path outputPath) throws IOException { FileUtils.deletePath(job.getConfiguration(), outputPath); FileOutputFormat.setOutputPath(job.getInternalJob(), outputPath); }
From source file:com.kylinolap.job.hadoop.cube.CubeHFileJob.java
License:Apache License
public int run(String[] args) throws Exception { Options options = new Options(); try {/*from w w w. jav a2 s . c o m*/ options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_HTABLE_NAME); parseOptions(options, args); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); File JarFile = new File(KylinConfig.getInstanceFromEnv().getKylinJobJarPath()); if (JarFile.exists()) { job.setJar(KylinConfig.getInstanceFromEnv().getKylinJobJarPath()); } else { job.setJarByClass(this.getClass()); } addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); FileOutputFormat.setOutputPath(job, output); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(CubeHFileMapper.class); job.setReducerClass(KeyValueSortReducer.class); // set job configuration job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); Configuration conf = HBaseConfiguration.create(getConf()); // add metadata to distributed cache attachKylinPropsAndMetadata(cube, job.getConfiguration()); String tableName = getOptionValue(OPTION_HTABLE_NAME).toUpperCase(); HTable htable = new HTable(conf, tableName); //Automatic config ! HFileOutputFormat.configureIncrementalLoad(job, htable); // set block replication to 3 for hfiles conf.set(DFSConfigKeys.DFS_REPLICATION_KEY, "3"); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.cube.CuboidJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {/*from ww w.j a v a 2s . c o m*/ options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_NCUBOID_LEVEL); options.addOption(OPTION_INPUT_FORMAT); parseOptions(options, args); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(); int nCuboidLevel = Integer.parseInt(getOptionValue(OPTION_NCUBOID_LEVEL)); String segmentName = getOptionValue(OPTION_SEGMENT_NAME); KylinConfig config = KylinConfig.getInstanceFromEnv(); CubeManager cubeMgr = CubeManager.getInstance(config); CubeInstance cube = cubeMgr.getCube(cubeName); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); System.out.println("Starting: " + job.getJobName()); FileInputFormat.setInputPaths(job, input); File jarFile = new File(config.getKylinJobJarPath()); if (jarFile.exists()) { job.setJar(config.getKylinJobJarPath()); } else { job.setJarByClass(this.getClass()); } // Mapper if (this.mapperClass == null) { throw new Exception("Mapper class is not set!"); } boolean isInputTextFormat = false; if (hasOption(OPTION_INPUT_FORMAT) && ("textinputformat".equalsIgnoreCase(getOptionValue(OPTION_INPUT_FORMAT)))) { isInputTextFormat = true; } if (isInputTextFormat) { job.setInputFormatClass(TextInputFormat.class); } else { job.setInputFormatClass(SequenceFileInputFormat.class); } job.setMapperClass(this.mapperClass); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(CuboidReducer.class); // for base cuboid shuffle skew, some rowkey aggregates far more records than others // Reducer job.setReducerClass(CuboidReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, output); // set job configuration job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); // add metadata to distributed cache attachKylinPropsAndMetadata(cube, job.getConfiguration()); setReduceTaskNum(job, config, cubeName, nCuboidLevel); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.cube.FactDistinctColumnsJob.java
License:Apache License
private void setupReduceOutput(Path output) throws IOException { job.setReducerClass(FactDistinctColumnsReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.OUTPUT_PATH, output.toString()); job.setNumReduceTasks(1);/* www. j av a 2 s .co m*/ deletePath(job.getConfiguration(), output); }
From source file:com.kylinolap.job.hadoop.cube.MergeCuboidJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {//from w w w . j a v a2s . c o m options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); parseOptions(options, args); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(); String segmentName = getOptionValue(OPTION_SEGMENT_NAME).toUpperCase(); KylinConfig config = KylinConfig.getInstanceFromEnv(); CubeManager cubeMgr = CubeManager.getInstance(config); CubeInstance cube = cubeMgr.getCube(cubeName); // CubeSegment cubeSeg = cubeMgr.findSegment(cube, segmentName); // start job String jobName = getOptionValue(OPTION_JOB_NAME); System.out.println("Starting: " + jobName); job = Job.getInstance(getConf(), jobName); // set job configuration - basic File JarFile = new File(config.getKylinJobJarPath()); if (JarFile.exists()) { job.setJar(config.getKylinJobJarPath()); } else { job.setJarByClass(this.getClass()); } // setJobJar(job); addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output); // Mapper job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(MergeCuboidMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Reducer - only one job.setReducerClass(CuboidReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // set job configuration job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); // add metadata to distributed cache attachKylinPropsAndMetadata(cube, job.getConfiguration()); setReduceTaskNum(job, config, cubeName, 0); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.cube.RangeKeyDistributionJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {//from w w w.j a va 2 s .co m options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); parseOptions(options, args); // start job String jobName = getOptionValue(OPTION_JOB_NAME); job = Job.getInstance(getConf(), jobName); File JarFile = new File(KylinConfig.getInstanceFromEnv().getKylinJobJarPath()); if (JarFile.exists()) { job.setJar(KylinConfig.getInstanceFromEnv().getKylinJobJarPath()); } else { job.setJarByClass(this.getClass()); } addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output); // job.getConfiguration().set("dfs.block.size", "67108864"); // Mapper job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(RangeKeyDistributionMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); // Reducer - only one job.setReducerClass(RangeKeyDistributionReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); this.deletePath(job.getConfiguration(), output); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); CubeCapacity cubeCapacity = cube.getDescriptor().getCapacity(); job.getConfiguration().set(BatchConstants.CUBE_CAPACITY, cubeCapacity.toString()); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); try {/*from w ww . j a va2s. com*/ options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_JOB_NAME); options.addOption(rowKeyStatsFilePath); parseOptions(options, args); String statsFilePath = getOptionValue(rowKeyStatsFilePath); // start job String jobName = getOptionValue(OPTION_JOB_NAME); job = Job.getInstance(getConf(), jobName); job.setJarByClass(this.getClass()); addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output); // Mapper job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(RowKeyDistributionCheckerMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); // Reducer - only one job.setReducerClass(RowKeyDistributionCheckerReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); job.getConfiguration().set("rowKeyStatsFilePath", statsFilePath); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.invertedindex.IICreateHFileJob.java
License:Apache License
public int run(String[] args) throws Exception { Options options = new Options(); try {/*www . ja v a2 s .c om*/ options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_HTABLE_NAME); parseOptions(options, args); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); File JarFile = new File(KylinConfig.getInstanceFromEnv().getKylinJobJarPath()); if (JarFile.exists()) { job.setJar(KylinConfig.getInstanceFromEnv().getKylinJobJarPath()); } else { job.setJarByClass(this.getClass()); } addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); FileOutputFormat.setOutputPath(job, output); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(IICreateHFileMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); String tableName = getOptionValue(OPTION_HTABLE_NAME); HTable htable = new HTable(getConf(), tableName); HFileOutputFormat.configureIncrementalLoad(job, htable); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { printUsage(options); log.error(e.getLocalizedMessage(), e); return 2; } }
From source file:com.kylinolap.job.hadoop.invertedindex.IIDistinctColumnsJob.java
License:Apache License
private void setupReduceOutput(Path output) throws IOException { job.setReducerClass(IIDistinctColumnsReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.OUTPUT_PATH, output.toString()); job.setNumReduceTasks(1);// w w w. ja va 2 s . c o m deletePath(job.getConfiguration(), output); }