Example usage for org.apache.hadoop.mapred JobConf setOutputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputFormat.

Prototype

public void setOutputFormat(Class<? extends OutputFormat> theClass)

Source Link

Document

Set the OutputFormat implementation for the map-reduce job.

Usage

From source file:fm.last.hadoop.programs.labs.trackstats.TrackStatisticsProgram.java

License:Apache License

/**
 * Create a JobConf for a Job that will calculate the number of unique listeners per track.
 * //from  www .  j a  v  a  2 s  . c  o  m
 * @param inputDir The path to the folder containing the raw listening data files.
 * @return The unique listeners JobConf.
 */
private JobConf getUniqueListenersJobConf(Path inputDir) {
    log.info("Creating configuration for unique listeners Job");

    // output results to a temporary intermediate folder, this will get deleted by start() method
    Path uniqueListenersOutput = new Path("uniqueListeners");

    JobConf conf = new JobConf(TrackStatisticsProgram.class);
    conf.setOutputKeyClass(IntWritable.class); // track id
    conf.setOutputValueClass(IntWritable.class); // number of unique listeners
    conf.setInputFormat(TextInputFormat.class); // raw listening data
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setMapperClass(UniqueListenersMapper.class);
    conf.setCombinerClass(UniqueListenersCombiner.class);
    conf.setReducerClass(UniqueListenersReducer.class);

    FileInputFormat.addInputPath(conf, inputDir);
    FileOutputFormat.setOutputPath(conf, uniqueListenersOutput);
    conf.setJobName("uniqueListeners");
    return conf;
}

From source file:fm.last.hadoop.programs.labs.trackstats.TrackStatisticsProgram.java

License:Apache License

/**
 * Creates a JobConf for a Job that will sum up the TrackStatistics per track.
 * //from  ww  w.j  ava2 s  .c  o  m
 * @param inputDir The path to the folder containing the raw input data files.
 * @return The sum JobConf.
 */
private JobConf getSumJobConf(Path inputDir) {
    log.info("Creating configuration for sum job");
    // output results to a temporary intermediate folder, this will get deleted by start() method
    Path playsOutput = new Path("sum");

    JobConf conf = new JobConf(TrackStatisticsProgram.class);
    conf.setOutputKeyClass(IntWritable.class); // track id
    conf.setOutputValueClass(TrackStats.class); // statistics for a track
    conf.setInputFormat(TextInputFormat.class); // raw listening data
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setMapperClass(SumMapper.class);
    conf.setCombinerClass(SumReducer.class);
    conf.setReducerClass(SumReducer.class);

    FileInputFormat.addInputPath(conf, inputDir);
    FileOutputFormat.setOutputPath(conf, playsOutput);
    conf.setJobName("sum");
    return conf;
}

From source file:fm.last.hadoop.programs.labs.trackstats.TrackStatisticsProgram.java

License:Apache License

/**
 * Creates a JobConf for a Job that will merge the unique listeners and track statistics.
 * /*from ww w . j  av  a 2  s  .co  m*/
 * @param outputPath The path for the results to be output to.
 * @param sumInputDir The path containing the data from the sum Job.
 * @param listenersInputDir The path containing the data from the unique listeners job.
 * @return The merge JobConf.
 */
private JobConf getMergeConf(Path outputPath, Path sumInputDir, Path listenersInputDir) {
    log.info("Creating configuration for merge job");
    JobConf conf = new JobConf(TrackStatisticsProgram.class);
    conf.setOutputKeyClass(IntWritable.class); // track id
    conf.setOutputValueClass(TrackStats.class); // overall track statistics
    conf.setCombinerClass(SumReducer.class); // safe to re-use reducer as a combiner here
    conf.setReducerClass(SumReducer.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileOutputFormat.setOutputPath(conf, outputPath);

    MultipleInputs.addInputPath(conf, sumInputDir, SequenceFileInputFormat.class, IdentityMapper.class);
    MultipleInputs.addInputPath(conf, listenersInputDir, SequenceFileInputFormat.class,
            MergeListenersMapper.class);
    conf.setJobName("merge");
    return conf;
}

From source file:FormatStorage1.MergeFileUtil.java

License:Open Source License

public static void run(String inputdir, String outputdir, Configuration conf) throws IOException {
    JobConf job = new JobConf(conf);
    job.setJobName("MergeFileUtil");
    job.setJarByClass(MergeFileUtil.class);
    FileSystem fs = null;//from   w  ww  .j a  v  a  2 s .  c  om
    fs = FileSystem.get(job);
    if (fs.exists(new Path(outputdir))) {
        throw new IOException("outputdir: " + outputdir + " exist!!!");
    }

    FileStatus[] fss = fs.listStatus(new Path(inputdir));

    if (fss == null || fss.length <= 0) {
        throw new IOException("no input files");
    }

    IFormatDataFile ifdf = new IFormatDataFile(job);
    ifdf.open(fss[0].getPath().toString());
    job.set("ifdf.head.info", ifdf.fileInfo().head().toStr());
    ifdf.close();

    long wholesize = 0;
    for (FileStatus status : fss) {
        wholesize += status.getLen();
    }

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, inputdir);
    FileOutputFormat.setOutputPath(job, new Path(outputdir));

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(IRecord.class);

    job.setMapperClass(MergeMap.class);

    job.setInputFormat(CombineFormatStorageFileInputFormat.class);
    job.setOutputFormat(MergeIFormatOutputFormat.class);

    JobClient jc = new JobClient(job);
    RunningJob rjob = jc.submitJob(job);
    try {

        String lastReport = "";
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
        long reportTime = System.currentTimeMillis();
        long maxReportInterval = 3 * 1000;

        while (!rjob.isComplete()) {
            Thread.sleep(1000);

            int mapProgress = Math.round(rjob.mapProgress() * 100);
            int reduceProgress = Math.round(rjob.reduceProgress() * 100);

            String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

            if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

                String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
                System.err.println(output);
                lastReport = report;
                reportTime = System.currentTimeMillis();
            }
        }
        LOG.info(rjob.getJobState());

    } catch (IOException e1) {
        e1.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:FormatStorage1.MergeFileUtil.java

License:Open Source License

public static void runold(String inputdir, String outputdir, Configuration conf) throws IOException {
    JobConf job = new JobConf(conf);
    job.setJobName("MergeFileUtil");
    job.setJarByClass(MergeFileUtil.class);
    FileSystem fs = null;//from www  .  ja  v  a  2  s  .  c  o  m
    fs = FileSystem.get(job);
    if (fs.exists(new Path(outputdir))) {
        throw new IOException("outputdir: " + outputdir + " exist!!!");
    }

    FileStatus[] fss = fs.listStatus(new Path(inputdir));

    if (fss == null || fss.length <= 0) {
        throw new IOException("no input files");
    }

    for (FileStatus status : fss) {
        if (status.isDir()) {
            throw new IOException("!!!input dir contains directory:\t" + status.getPath().toString());
        }
    }

    IFormatDataFile ifdf = new IFormatDataFile(job);
    ifdf.open(fss[0].getPath().toString());
    job.set("ifdf.head.info", ifdf.fileInfo().head().toStr());
    ifdf.close();

    long wholesize = 0;
    for (FileStatus status : fss) {
        wholesize += status.getLen();
    }

    long fl = 512 * 1024 * 1024;
    int reduces = (int) (wholesize / fl + 1);
    job.setNumReduceTasks(reduces);

    FileInputFormat.setInputPaths(job, inputdir);
    FileOutputFormat.setOutputPath(job, new Path(outputdir));

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(IRecord.class);

    job.setMapperClass(MergeMap.class);
    job.setReducerClass(MergeReduce.class);

    job.setInputFormat(MergeIFormatInputFormat.class);
    job.setOutputFormat(MergeIFormatOutputFormat.class);

    JobClient jc = new JobClient(job);
    RunningJob rjob = jc.submitJob(job);
    try {

        String lastReport = "";
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
        long reportTime = System.currentTimeMillis();
        long maxReportInterval = 3 * 1000;

        while (!rjob.isComplete()) {
            Thread.sleep(1000);

            int mapProgress = Math.round(rjob.mapProgress() * 100);
            int reduceProgress = Math.round(rjob.reduceProgress() * 100);

            String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

            if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

                String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
                System.err.println(output);
                lastReport = report;
                reportTime = System.currentTimeMillis();
            }
        }
        LOG.info(rjob.getJobState());

    } catch (IOException e1) {
        e1.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:FormatStorage1.MergeFileUtil1.java

License:Open Source License

public static void run(String inputdir, String outputdir, Configuration conf) throws IOException {
    JobConf job = new JobConf(conf);
    job.setJobName("MergeFileUtil1");
    job.setJarByClass(MergeFileUtil1.class);
    FileSystem fs = null;/*from   www .  j av a2 s.  co  m*/
    fs = FileSystem.get(job);
    if (fs.exists(new Path(outputdir))) {
        throw new IOException("outputdir: " + outputdir + " exist!!!");
    }

    FileStatus[] fss = fs.listStatus(new Path(inputdir));

    if (fss == null || fss.length <= 0) {
        throw new IOException("no input files");
    }

    IFormatDataFile ifdf = new IFormatDataFile(job);
    ifdf.open(fss[0].getPath().toString());
    job.set("ifdf.head.info", ifdf.fileInfo().head().toStr());
    ifdf.close();

    long wholesize = 0;
    for (FileStatus status : fss) {
        wholesize += status.getLen();
    }

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, inputdir);
    FileOutputFormat.setOutputPath(job, new Path(outputdir));

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(IRecord.class);

    job.setMapperClass(MergeMap.class);

    job.setInputFormat(CombineFormatStorageFileInputFormat.class);
    job.setOutputFormat(MergeIFormatOutputFormat1.class);

    JobClient jc = new JobClient(job);
    RunningJob rjob = jc.submitJob(job);
    try {

        String lastReport = "";
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
        long reportTime = System.currentTimeMillis();
        long maxReportInterval = 3 * 1000;

        while (!rjob.isComplete()) {
            Thread.sleep(1000);

            int mapProgress = Math.round(rjob.mapProgress() * 100);
            int reduceProgress = Math.round(rjob.reduceProgress() * 100);

            String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

            if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

                String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
                System.err.println(output);
                lastReport = report;
                reportTime = System.currentTimeMillis();
            }
        }
        LOG.info(rjob.getJobState());

    } catch (IOException e1) {
        e1.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:FormatStorage1.MergeFileUtil1.java

License:Open Source License

public static void runold(String inputdir, String outputdir, Configuration conf) throws IOException {
    JobConf job = new JobConf(conf);
    job.setJobName("MergeFileUtil");
    job.setJarByClass(MergeFileUtil1.class);
    FileSystem fs = null;/*w w w . j av a2s.  co  m*/
    fs = FileSystem.get(job);
    if (fs.exists(new Path(outputdir))) {
        throw new IOException("outputdir: " + outputdir + " exist!!!");
    }

    FileStatus[] fss = fs.listStatus(new Path(inputdir));

    if (fss == null || fss.length <= 0) {
        throw new IOException("no input files");
    }

    for (FileStatus status : fss) {
        if (status.isDir()) {
            throw new IOException("!!!input dir contains directory:\t" + status.getPath().toString());
        }
    }

    IFormatDataFile ifdf = new IFormatDataFile(job);
    ifdf.open(fss[0].getPath().toString());
    job.set("ifdf.head.info", ifdf.fileInfo().head().toStr());
    ifdf.close();

    long wholesize = 0;
    for (FileStatus status : fss) {
        wholesize += status.getLen();
    }

    long fl = 512 * 1024 * 1024;
    int reduces = (int) (wholesize / fl + 1);
    job.setNumReduceTasks(reduces);

    FileInputFormat.setInputPaths(job, inputdir);
    FileOutputFormat.setOutputPath(job, new Path(outputdir));

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(IRecord.class);

    job.setMapperClass(MergeMap.class);
    job.setReducerClass(MergeReduce.class);

    job.setInputFormat(MergeIFormatInputFormat.class);
    job.setOutputFormat(MergeIFormatOutputFormat.class);

    JobClient jc = new JobClient(job);
    RunningJob rjob = jc.submitJob(job);
    try {

        String lastReport = "";
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
        long reportTime = System.currentTimeMillis();
        long maxReportInterval = 3 * 1000;

        while (!rjob.isComplete()) {
            Thread.sleep(1000);

            int mapProgress = Math.round(rjob.mapProgress() * 100);
            int reduceProgress = Math.round(rjob.reduceProgress() * 100);

            String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

            if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

                String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
                System.err.println(output);
                lastReport = report;
                reportTime = System.currentTimeMillis();
            }
        }
        LOG.info(rjob.getJobState());

    } catch (IOException e1) {
        e1.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:fr.worf.hadoop.scratchpad.Scratch2MapReduce.java

License:Apache License

/**
 * @param args the command line arguments
 * @throws java.io.IOException//from ww  w.  j a v  a  2s  .c  om
 */
public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(Scratch2MapReduce.class);
    job.setJobName("wordcount");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path("/home/slash/test/testfile1.txt"));
    FileOutputFormat.setOutputPath(job, new Path("/home/slash/test/testfile2.txt"));

    RunningJob runJob = JobClient.runJob(job);
}

From source file:gov.nasa.jpl.memex.pooledtimeseries.GradientTimeSeries.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.loadLibrary(Core.NATIVE_LIBRARY_NAME);

    Configuration baseConf = new Configuration();
    baseConf.set("mapred.reduce.tasks", "0");
    JobConf conf = new JobConf(baseConf, GradientTimeSeries.class);

    conf.setJobName("gradient_time_series");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(MultiFileOutput.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);//  ww w .ja v a 2  s  . com
}

From source file:gov.nasa.jpl.memex.pooledtimeseries.OpticalTimeSeries.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
    LOG.info("Loaded- " + Core.NATIVE_LIBRARY_NAME);

    Configuration baseConf = new Configuration();
    baseConf.set("mapred.reduce.tasks", "0");
    JobConf conf = new JobConf(baseConf, OpticalTimeSeries.class);

    conf.setJobName("optical_time_series");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(MultiFileOutput.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*from  w  w w  . j  av a 2  s.c  o  m*/
}