Example usage for org.apache.hadoop.mapred JobConf setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass)

Source Link

Document

Set the value class for job outputs.

Usage

From source file:eu.stratosphere.hadoopcompatibility.mapred.record.HadoopDataSink.java

License:Apache License

@SuppressWarnings("deprecation")
public HadoopDataSink(OutputFormat<K, V> hadoopFormat, JobConf jobConf, String name,
        List<Operator<Record>> input, StratosphereTypeConverter<K, V> conv, Class<K> keyClass,
        Class<V> valueClass) {
    super(new HadoopRecordOutputFormat<K, V>(hadoopFormat, jobConf, conv), input, name);
    Preconditions.checkNotNull(hadoopFormat);
    Preconditions.checkNotNull(jobConf);
    this.name = name;
    this.jobConf = jobConf;
    jobConf.setOutputKeyClass(keyClass);
    jobConf.setOutputValueClass(valueClass);
}

From source file:eu.stratosphere.myriad.driver.hadoop.MyriadDriverHadoopJob.java

License:Apache License

private JobConf createJobConf() {
    // create job
    JobConf conf = new JobConf(getConf());

    conf.setJarByClass(MyriadDriverHadoopJob.class);
    conf.setJobName(String.format("%s", this.parameters.getDGenName()));

    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(IdentityMapper.class);
    conf.setNumReduceTasks(0);//from  ww  w. j a  v a 2s . co  m

    conf.setInputFormat(MyriadInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // input format configuration
    MyriadInputFormat.setDriverJobParameters(conf, this.parameters);
    // output format configuration
    FileOutputFormat.setOutputPath(conf, new Path(this.parameters.getJobOutputPath()));

    return conf;
}

From source file:findstableweatherstate.FindStableWeatherState.java

public String call() throws Exception {

    Path firstOutputPath = new Path("input/firstOutput");
    Path secondOutputPath = new Path("input/secondOutput");

    long startTime, stopTime, elapsedTime;

    JobConf job = new JobConf();
    job.setJarByClass(getClass());/* www .  ja va2  s.  com*/
    job.setJobName("invertedindex");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setReducerClass(JoinReducer.class);

    MultipleInputs.addInputPath(job, new Path(getInputPathStation()), TextInputFormat.class,
            StationMapper.class);
    MultipleInputs.addInputPath(job, new Path(getInputPathReadings()), TextInputFormat.class,
            ReadingsMapper.class);

    FileOutputFormat.setOutputPath(job, firstOutputPath);

    JobConf job2 = new JobConf();
    job2.setJarByClass(getClass());
    job2.setJobName("secondJob");

    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);

    //job2.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    FileInputFormat.setInputPaths(job2, firstOutputPath);

    job2.setMapperClass(CalculateMinMaxTemperatureMapper.class);

    job2.setReducerClass(CalculateMaxMinTemperatureReducer.class);
    if (getOutputPath() != null) {
        FileOutputFormat.setOutputPath(job2, secondOutputPath);
    }

    JobConf job3 = new JobConf();
    job3.setJarByClass(getClass());
    job3.setJobName("thirdJob");

    job3.setOutputKeyClass(Text.class);
    job3.setOutputValueClass(Text.class);
    job3.setMapOutputKeyClass(DoubleWritable.class);
    job3.setMapOutputValueClass(Text.class);
    //job2.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    FileInputFormat.setInputPaths(job3, secondOutputPath);

    job3.setMapperClass(SortStateMapper.class);

    job3.setReducerClass(SortStateReducer.class);
    if (getOutputPath() != null) {
        FileOutputFormat.setOutputPath(job3, new Path(getOutputPath()));
    }

    startTime = System.currentTimeMillis();

    JobClient.runJob(job);

    stopTime = System.currentTimeMillis();
    elapsedTime = stopTime - startTime;
    System.out.println("******************** First Job : " + elapsedTime / 1000);

    startTime = System.currentTimeMillis();

    JobClient.runJob(job2);

    stopTime = System.currentTimeMillis();
    elapsedTime = stopTime - startTime;
    System.out.println("******************** Second Job : " + elapsedTime / 1000);

    startTime = System.currentTimeMillis();

    JobClient.runJob(job3);

    stopTime = System.currentTimeMillis();
    elapsedTime = stopTime - startTime;
    System.out.println("******************** Third Job : " + elapsedTime / 1000);

    return "";
}

From source file:findstableweatherstate.FindStableWeatherState.java

@SuppressWarnings("unchecked")
// <editor-fold defaultstate="collapsed"
// desc="Generated Code">//GEN-BEGIN:initJobConf
public static void initJobConf(JobConf conf) {
    // Generating code using Karmasphere Protocol for Hadoop 0.18
    // CG_GLOBAL//from ww w.j  av  a 2s.  c o m

    // CG_INPUT_HIDDEN
    conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);

    // CG_MAPPER_HIDDEN
    conf.setMapperClass(StationMapper.class);

    // CG_MAPPER
    conf.setMapOutputKeyClass(org.apache.hadoop.io.Text.class);

    //conf.setReducerClass(IndexReducer.class);

    // CG_REDUCER
    // conf.setNumReduceTasks(1);
    conf.setOutputKeyClass(Text.class);

    // CG_OUTPUT_HIDDEN
    conf.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class);

    conf.setMapOutputValueClass(Text.class);
    conf.setOutputValueClass(Text.class);
    // CG_OUTPUT

    // Others
}

From source file:fire.util.fileformats.iomapred.LoadBinaryToSequence.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        printUsage();//from  w w w.  jav  a2 s .  c o  m
        return 2;
    }

    JobConf conf = new JobConf(LoadBinaryToSequence.class);
    conf.setJobName("loadbinarytosequence");

    //set the InputFormat of the job to our InputFormat
    conf.setInputFormat(CombineFileBinaryInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are images
    conf.setOutputValueClass(BytesWritable.class);

    //use the defined mapper
    conf.setMapperClass(MapClass.class);

    FileInputFormat.addInputPaths(conf, args[0]);
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);

    return 0;
}

From source file:fm.last.hadoop.programs.labs.trackstats.TrackStatisticsProgram.java

License:Apache License

/**
 * Create a JobConf for a Job that will calculate the number of unique listeners per track.
 * /* www .jav a2 s  .c  o  m*/
 * @param inputDir The path to the folder containing the raw listening data files.
 * @return The unique listeners JobConf.
 */
private JobConf getUniqueListenersJobConf(Path inputDir) {
    log.info("Creating configuration for unique listeners Job");

    // output results to a temporary intermediate folder, this will get deleted by start() method
    Path uniqueListenersOutput = new Path("uniqueListeners");

    JobConf conf = new JobConf(TrackStatisticsProgram.class);
    conf.setOutputKeyClass(IntWritable.class); // track id
    conf.setOutputValueClass(IntWritable.class); // number of unique listeners
    conf.setInputFormat(TextInputFormat.class); // raw listening data
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setMapperClass(UniqueListenersMapper.class);
    conf.setCombinerClass(UniqueListenersCombiner.class);
    conf.setReducerClass(UniqueListenersReducer.class);

    FileInputFormat.addInputPath(conf, inputDir);
    FileOutputFormat.setOutputPath(conf, uniqueListenersOutput);
    conf.setJobName("uniqueListeners");
    return conf;
}

From source file:fm.last.hadoop.programs.labs.trackstats.TrackStatisticsProgram.java

License:Apache License

/**
 * Creates a JobConf for a Job that will sum up the TrackStatistics per track.
 * /*ww  w .  j  a v  a2 s  .  c o  m*/
 * @param inputDir The path to the folder containing the raw input data files.
 * @return The sum JobConf.
 */
private JobConf getSumJobConf(Path inputDir) {
    log.info("Creating configuration for sum job");
    // output results to a temporary intermediate folder, this will get deleted by start() method
    Path playsOutput = new Path("sum");

    JobConf conf = new JobConf(TrackStatisticsProgram.class);
    conf.setOutputKeyClass(IntWritable.class); // track id
    conf.setOutputValueClass(TrackStats.class); // statistics for a track
    conf.setInputFormat(TextInputFormat.class); // raw listening data
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setMapperClass(SumMapper.class);
    conf.setCombinerClass(SumReducer.class);
    conf.setReducerClass(SumReducer.class);

    FileInputFormat.addInputPath(conf, inputDir);
    FileOutputFormat.setOutputPath(conf, playsOutput);
    conf.setJobName("sum");
    return conf;
}

From source file:fm.last.hadoop.programs.labs.trackstats.TrackStatisticsProgram.java

License:Apache License

/**
 * Creates a JobConf for a Job that will merge the unique listeners and track statistics.
 * //from  w w  w  .jav a 2  s . c  o  m
 * @param outputPath The path for the results to be output to.
 * @param sumInputDir The path containing the data from the sum Job.
 * @param listenersInputDir The path containing the data from the unique listeners job.
 * @return The merge JobConf.
 */
private JobConf getMergeConf(Path outputPath, Path sumInputDir, Path listenersInputDir) {
    log.info("Creating configuration for merge job");
    JobConf conf = new JobConf(TrackStatisticsProgram.class);
    conf.setOutputKeyClass(IntWritable.class); // track id
    conf.setOutputValueClass(TrackStats.class); // overall track statistics
    conf.setCombinerClass(SumReducer.class); // safe to re-use reducer as a combiner here
    conf.setReducerClass(SumReducer.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileOutputFormat.setOutputPath(conf, outputPath);

    MultipleInputs.addInputPath(conf, sumInputDir, SequenceFileInputFormat.class, IdentityMapper.class);
    MultipleInputs.addInputPath(conf, listenersInputDir, SequenceFileInputFormat.class,
            MergeListenersMapper.class);
    conf.setJobName("merge");
    return conf;
}

From source file:FormatStorage1.MergeFileUtil.java

License:Open Source License

public static void run(String inputdir, String outputdir, Configuration conf) throws IOException {
    JobConf job = new JobConf(conf);
    job.setJobName("MergeFileUtil");
    job.setJarByClass(MergeFileUtil.class);
    FileSystem fs = null;/*from   ww  w.j a  va 2  s  . co m*/
    fs = FileSystem.get(job);
    if (fs.exists(new Path(outputdir))) {
        throw new IOException("outputdir: " + outputdir + " exist!!!");
    }

    FileStatus[] fss = fs.listStatus(new Path(inputdir));

    if (fss == null || fss.length <= 0) {
        throw new IOException("no input files");
    }

    IFormatDataFile ifdf = new IFormatDataFile(job);
    ifdf.open(fss[0].getPath().toString());
    job.set("ifdf.head.info", ifdf.fileInfo().head().toStr());
    ifdf.close();

    long wholesize = 0;
    for (FileStatus status : fss) {
        wholesize += status.getLen();
    }

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, inputdir);
    FileOutputFormat.setOutputPath(job, new Path(outputdir));

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(IRecord.class);

    job.setMapperClass(MergeMap.class);

    job.setInputFormat(CombineFormatStorageFileInputFormat.class);
    job.setOutputFormat(MergeIFormatOutputFormat.class);

    JobClient jc = new JobClient(job);
    RunningJob rjob = jc.submitJob(job);
    try {

        String lastReport = "";
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
        long reportTime = System.currentTimeMillis();
        long maxReportInterval = 3 * 1000;

        while (!rjob.isComplete()) {
            Thread.sleep(1000);

            int mapProgress = Math.round(rjob.mapProgress() * 100);
            int reduceProgress = Math.round(rjob.reduceProgress() * 100);

            String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

            if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

                String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
                System.err.println(output);
                lastReport = report;
                reportTime = System.currentTimeMillis();
            }
        }
        LOG.info(rjob.getJobState());

    } catch (IOException e1) {
        e1.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:FormatStorage1.MergeFileUtil.java

License:Open Source License

public static void runold(String inputdir, String outputdir, Configuration conf) throws IOException {
    JobConf job = new JobConf(conf);
    job.setJobName("MergeFileUtil");
    job.setJarByClass(MergeFileUtil.class);
    FileSystem fs = null;/*  w  w w .java 2s. co  m*/
    fs = FileSystem.get(job);
    if (fs.exists(new Path(outputdir))) {
        throw new IOException("outputdir: " + outputdir + " exist!!!");
    }

    FileStatus[] fss = fs.listStatus(new Path(inputdir));

    if (fss == null || fss.length <= 0) {
        throw new IOException("no input files");
    }

    for (FileStatus status : fss) {
        if (status.isDir()) {
            throw new IOException("!!!input dir contains directory:\t" + status.getPath().toString());
        }
    }

    IFormatDataFile ifdf = new IFormatDataFile(job);
    ifdf.open(fss[0].getPath().toString());
    job.set("ifdf.head.info", ifdf.fileInfo().head().toStr());
    ifdf.close();

    long wholesize = 0;
    for (FileStatus status : fss) {
        wholesize += status.getLen();
    }

    long fl = 512 * 1024 * 1024;
    int reduces = (int) (wholesize / fl + 1);
    job.setNumReduceTasks(reduces);

    FileInputFormat.setInputPaths(job, inputdir);
    FileOutputFormat.setOutputPath(job, new Path(outputdir));

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(IRecord.class);

    job.setMapperClass(MergeMap.class);
    job.setReducerClass(MergeReduce.class);

    job.setInputFormat(MergeIFormatInputFormat.class);
    job.setOutputFormat(MergeIFormatOutputFormat.class);

    JobClient jc = new JobClient(job);
    RunningJob rjob = jc.submitJob(job);
    try {

        String lastReport = "";
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
        long reportTime = System.currentTimeMillis();
        long maxReportInterval = 3 * 1000;

        while (!rjob.isComplete()) {
            Thread.sleep(1000);

            int mapProgress = Math.round(rjob.mapProgress() * 100);
            int reduceProgress = Math.round(rjob.reduceProgress() * 100);

            String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

            if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

                String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
                System.err.println(output);
                lastReport = report;
                reportTime = System.currentTimeMillis();
            }
        }
        LOG.info(rjob.getJobState());

    } catch (IOException e1) {
        e1.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}