List of usage examples for org.apache.hadoop.mapred JobConf setInputFormat
public void setInputFormat(Class<? extends InputFormat> theClass)
From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskHDFS.java
License:Apache License
public int run(String[] args) throws Exception { long startTime = System.currentTimeMillis(); if (args.length < 5) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }//from w w w . j a v a 2 s. c o m String dateFrom = args[0]; String dateTo = args[1]; String rankingsInputDir = args[2]; String userVisitsInputDir = args[3]; String outputDir = args[4]; // output path (delete) Path outputPath = new Path(outputDir); HDFSUtil.deletePath(outputPath); // phase 1 JobConf conf1 = new JobConf(this.getClass()); conf1.setJobName("join_hdfs_phase1"); Path p1Output = new Path(outputDir + "/phase1"); FileOutputFormat.setOutputPath(conf1, p1Output); conf1.setInputFormat(TextInputFormat.class); conf1.setOutputFormat(TextOutputFormat.class); conf1.setOutputKeyClass(Text.class); conf1.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf1, new Path(rankingsInputDir), new Path(userVisitsInputDir)); conf1.set(DATE_FROM_PARAM, dateFrom); conf1.set(DATE_TO_PARAM, dateTo); conf1.setMapperClass(Phase1Map.class); conf1.setReducerClass(Phase1Reduce.class); // conf1.setPartitionerClass(theClass) RunningJob job1 = JobClient.runJob(conf1); if (job1.isSuccessful()) { // phase 2 JobConf conf2 = new JobConf(this.getClass()); conf2.setJobName("join_hdfs_phase2"); conf2.setInputFormat(KeyValueTextInputFormat.class); conf2.setOutputFormat(TextOutputFormat.class); conf2.setOutputKeyClass(Text.class); conf2.setOutputValueClass(Text.class); conf2.setMapperClass(IdentityMapper.class); conf2.setReducerClass(Phase2Reduce.class); Path p2Output = new Path(outputDir + "/phase2"); FileOutputFormat.setOutputPath(conf2, p2Output); FileInputFormat.setInputPaths(conf2, p1Output); RunningJob job2 = JobClient.runJob(conf2); if (job2.isSuccessful()) { // phase 3 JobConf conf3 = new JobConf(this.getClass()); conf3.setJobName("join_hdfs_phase3"); conf3.setNumReduceTasks(1); conf3.setInputFormat(KeyValueTextInputFormat.class); conf3.setOutputKeyClass(Text.class); conf3.setOutputValueClass(Text.class); conf3.setMapperClass(IdentityMapper.class); conf3.setReducerClass(Phase3Reduce.class); Path p3Output = new Path(outputDir + "/phase3"); FileOutputFormat.setOutputPath(conf3, p3Output); FileInputFormat.setInputPaths(conf3, p2Output); RunningJob job3 = JobClient.runJob(conf3); if (!job3.isSuccessful()) { System.out.println("PHASE 3 FAILED!!!"); } } else { System.out.println("PHASE 2 FAILED!!!"); } } else { System.out.println("PHASE 1 FAILED!!!"); } long endTime = System.currentTimeMillis(); System.out.println("\nJOB TIME : " + (endTime - startTime) + " ms.\n"); return 0; }
From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("selection_hdfs"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);//from w w w . jav a 2 s . com conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 3) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); } conf.set(PAGE_RANK_VALUE_PARAM, args[0]); FileInputFormat.setInputPaths(conf, new Path(args[1])); // OUTPUT properties Path outputPath = new Path(args[2]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.UDFAggTaskHDFS.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(this.getClass()); conf.setJobName("udf_agg_hdfs"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(LongWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(LongSumReducer.class); conf.setReducerClass(LongSumReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 2) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }/*from w w w.j a v a2 s. co m*/ FileInputFormat.setInputPaths(conf, new Path(args[0])); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); return conf; }
From source file:edu.yale.cs.hadoopdb.dataloader.GlobalHasher.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(getConf(), this.getClass()); conf.setJobName("GlobalHasher"); conf.setMapOutputKeyClass(UnsortableInt.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(GlobalHasher.Map.class); conf.setReducerClass(GlobalHasher.Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); if (args.length < 5) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); }/*ww w .ja v a2s . c om*/ FileInputFormat.setInputPaths(conf, new Path(args[0])); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); int partNo = Integer.parseInt(args[2]); conf.setNumReduceTasks(partNo); conf.set(DELIMITER_PARAM, args[3]); int hashFieldPos = Integer.parseInt(args[4]); conf.setInt(HASH_FIELD_POS_PARAM, hashFieldPos); return conf; }
From source file:edu.yale.cs.hadoopdb.exec.DBJobBase.java
License:Apache License
/** * Sets an input format (DBJobBaseInputFormat by default) *//* ww w . java 2s . c o m*/ protected void setInputFormat(JobConf conf) { conf.setInputFormat(DBJobBaseInputFormat.class); }
From source file:eu.scape_project.tb.chutney.ChutneyDriver.java
License:Apache License
/** * This method sets up and runs the job on Hadoop * @param args The passed through command line arguments *///from w ww . ja v a2s.c om public int run(String[] args) { CommandLineParser parser = new PosixParser(); Options options = new Options(); options.addOption("n", "jobname", true, "name to assign to the hadoop job"); options.addOption("i", "inputlist", true, "text file containing list of input files (ensure no trailing carriage returns)"); options.addOption("t", "jobtype", true, "type of job; CLJ (command line job), TSJ (Taverna Server job), TCL (Taverna command line job), XML (XML defined command line job), XWR (XML workflow report)"); options.addOption("x", "xmlcode", true, "xml definition of job to run for XML jobs"); options.addOption("h", "help", false, "help text"); JobConf conf = new JobConf(ChutneyDriver.class); String input = null; String xmlcode = null; CommandLine com; try { com = parser.parse(options, args); if (com.hasOption("help")) { throw (new ParseException("")); } String jobName = Settings.JOB_NAME + "default"; if (com.hasOption("jobname")) { //set the job name to something better than the default jobName = Settings.JOB_NAME + com.getOptionValue("jobname"); } conf.setJobName(jobName); JobType jobType = JobType.CommandLineJob; if (com.hasOption("jobtype")) { String value = com.getOptionValue("jobtype").toUpperCase(); if (value.equals(CommandLineJob.getShortJobType())) { jobType = CommandLineJob.getJobType(); } else if (value.equals(TavernaCommandLineJob.getShortJobType())) { jobType = TavernaCommandLineJob.getJobType(); } else if (value.equals(TavernaServerJob.getShortJobType())) { jobType = TavernaServerJob.getJobType(); } else if (value.equals(XMLCommandLineJob.getShortJobType())) { jobType = XMLCommandLineJob.getJobType(); } else if (value.equals(XMLWorkflowReport.getShortJobType())) { jobType = XMLWorkflowReport.getJobType(); } } System.out.println("JobType: " + jobType.toString()); conf.set(Settings.JOBTYPE_CONF_SETTING, jobType.toString()); if (com.hasOption("xmlcode")) { //jobType == JobType.XMLCommandLineJob xmlcode = com.getOptionValue("xmlcode"); //if it is a local file get the full path if (new File(xmlcode).exists()) xmlcode = new File(xmlcode).getAbsolutePath(); conf.set(Settings.XMLCODE_CONF_SETTING, xmlcode); } if ((jobType == JobType.XMLCommandLineJob) & (xmlcode == null)) { //i.e. no code specified System.out.println("No XML code specified on the command line"); return -1; } if (com.hasOption("inputlist")) { input = com.getOptionValue("inputlist"); } if (input.equals(null)) { System.out.println("no input given"); return -2; } } catch (ParseException e) { HelpFormatter help = new HelpFormatter(); help.printHelp("hadoop jar TavernaHadoopWrapper.jar", options); return -1; } //using matchbox it may take a while to process the jobs //set a longer timeout than the default (10 mins) //six hours should be more than enough :/ MMM*SS*MS //QAJob testing for 9 tests on ANJO files can take ~4.5hrs+ conf.set("mapred.task.timeout", Integer.toString(360 * 60 * 1000)); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(conf.getJobName())); //set the mapper to this class' mapper conf.setMapperClass(Chutney.class); //we don't want to reduce //conf.setReducerClass(Reducer.class); //this input format should split the input by one line per map by default. conf.setInputFormat(NLineInputFormat.class); conf.setInt("mapred.line.input.format.linespermap", 1); //sets how the output is written cf. OutputFormat //we can use nulloutputformat if we are writing our own output conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //this sets maximum jvm reuse conf.set("mapred.job.reuse.jvm.num.tasks", "-1"); //we only want one reduce task conf.setNumReduceTasks(1); try { JobClient.runJob(conf); } catch (IOException ioe) { ioe.printStackTrace(); return -1; } return 0; }
From source file:eu.stratosphere.myriad.driver.hadoop.MyriadDriverHadoopJob.java
License:Apache License
private JobConf createJobConf() { // create job JobConf conf = new JobConf(getConf()); conf.setJarByClass(MyriadDriverHadoopJob.class); conf.setJobName(String.format("%s", this.parameters.getDGenName())); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(IdentityMapper.class); conf.setNumReduceTasks(0);// ww w. java2s. c om conf.setInputFormat(MyriadInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // input format configuration MyriadInputFormat.setDriverJobParameters(conf, this.parameters); // output format configuration FileOutputFormat.setOutputPath(conf, new Path(this.parameters.getJobOutputPath())); return conf; }
From source file:findstableweatherstate.FindStableWeatherState.java
@SuppressWarnings("unchecked") // <editor-fold defaultstate="collapsed" // desc="Generated Code">//GEN-BEGIN:initJobConf public static void initJobConf(JobConf conf) { // Generating code using Karmasphere Protocol for Hadoop 0.18 // CG_GLOBAL//from w w w.j a v a 2 s. com // CG_INPUT_HIDDEN conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); // CG_MAPPER_HIDDEN conf.setMapperClass(StationMapper.class); // CG_MAPPER conf.setMapOutputKeyClass(org.apache.hadoop.io.Text.class); //conf.setReducerClass(IndexReducer.class); // CG_REDUCER // conf.setNumReduceTasks(1); conf.setOutputKeyClass(Text.class); // CG_OUTPUT_HIDDEN conf.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class); conf.setMapOutputValueClass(Text.class); conf.setOutputValueClass(Text.class); // CG_OUTPUT // Others }
From source file:fire.util.fileformats.iomapred.LoadBinaryToSequence.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage();//from w w w. java2s.c om return 2; } JobConf conf = new JobConf(LoadBinaryToSequence.class); conf.setJobName("loadbinarytosequence"); //set the InputFormat of the job to our InputFormat conf.setInputFormat(CombineFileBinaryInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are images conf.setOutputValueClass(BytesWritable.class); //use the defined mapper conf.setMapperClass(MapClass.class); FileInputFormat.addInputPaths(conf, args[0]); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); return 0; }
From source file:fm.last.hadoop.programs.labs.trackstats.TrackStatisticsProgram.java
License:Apache License
/** * Create a JobConf for a Job that will calculate the number of unique listeners per track. * /*ww w . j a v a 2 s .co m*/ * @param inputDir The path to the folder containing the raw listening data files. * @return The unique listeners JobConf. */ private JobConf getUniqueListenersJobConf(Path inputDir) { log.info("Creating configuration for unique listeners Job"); // output results to a temporary intermediate folder, this will get deleted by start() method Path uniqueListenersOutput = new Path("uniqueListeners"); JobConf conf = new JobConf(TrackStatisticsProgram.class); conf.setOutputKeyClass(IntWritable.class); // track id conf.setOutputValueClass(IntWritable.class); // number of unique listeners conf.setInputFormat(TextInputFormat.class); // raw listening data conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapperClass(UniqueListenersMapper.class); conf.setCombinerClass(UniqueListenersCombiner.class); conf.setReducerClass(UniqueListenersReducer.class); FileInputFormat.addInputPath(conf, inputDir); FileOutputFormat.setOutputPath(conf, uniqueListenersOutput); conf.setJobName("uniqueListeners"); return conf; }