Example usage for org.apache.hadoop.mapred JobConf setOutputKeyClass

List of usage examples for org.apache.hadoop.mapred JobConf setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) 

Source Link

Document

Set the key class for the job output data.

Usage

From source file:hitune.analysis.mapreduce.processor.HistoryLog.java

License:Apache License

public void run() {
    // TODO Auto-generated method stub
    long timestamp = System.currentTimeMillis();
    JobConf conf = new JobConf(this.conf, HistoryLog.class);
    try {/*  www  . jav  a2s . c  o m*/
        conf.setJobName(this.getClass().getSimpleName() + timestamp);
        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(HistoryLog.MapClass.class);
        conf.setReducerClass(HistoryLog.ReduceClass.class);
        conf.setOutputKeyClass(Text.class);

        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));

            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }
    } catch (Exception e) {
        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }
}

From source file:hitune.analysis.mapreduce.processor.InstrumentDataflow.java

License:Apache License

@Override
public void run() {
    // TODO Auto-generated method stub

    long timestamp = System.currentTimeMillis();

    JobConf conf = new JobConf(this.conf, InstrumentDataflow.class);
    try {//ww w .j  a v  a 2 s.  c  om
        conf.setJobName(this.getClass().getSimpleName() + timestamp);
        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(InstrumentDataflow.MapClass.class);
        conf.setReducerClass(InstrumentDataflow.ReduceClass.class);
        conf.setOutputKeyClass(Text.class);
        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));

            //FileInputFormat.setInputPathFilter(conf, evtFileFilter.class);
            //conf.setNumReduceTasks(1);

            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }
    } catch (Exception e) {
        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }
}

From source file:hitune.analysis.mapreduce.processor.InstrumentSamplingTop.java

License:Apache License

@Override
public void run() {
    // TODO Auto-generated method stub
    long timestamp = System.currentTimeMillis();
    try {/*from ww  w  .j  av  a  2 s .  c o m*/
        JobConf conf = new JobConf(this.conf, InstrumentSamplingTop.class);
        conf.setJobName(this.getClass().getSimpleName() + "_1_" + timestamp);

        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(InstrumentSamplingTop.MapClass.class);
        conf.setReducerClass(InstrumentSamplingTop.ReduceClass.class);

        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);
        conf.setOutputKeyClass(outputKeyClass);
        conf.setOutputValueClass(outputValueClass);

        conf.setOutputFormat(SequenceFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);

        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(outputPaths + "_1_" + timestamp));

            try {

                //first job
                JobClient.runJob(conf);

                JobConf secondconf = new JobConf(this.conf, InstrumentSamplingTop.class);
                secondconf.setJobName(this.getClass().getSimpleName() + "_2_" + timestamp);
                secondconf.setInputFormat(SequenceFileInputFormat.class);
                secondconf.setMapperClass(IdentityMapper.class);
                secondconf.setReducerClass(InstrumentSamplingTop.TopClass.class);

                secondconf.setMapOutputKeyClass(outputKeyClass);
                secondconf.setMapOutputValueClass(outputValueClass);

                secondconf.setOutputKeyClass(Text.class);
                secondconf.setOutputValueClass(TextArrayWritable.class);
                secondconf.setOutputFormat(CSVFileOutputFormat.class);
                FileInputFormat.setInputPaths(secondconf, outputPaths + "_1_" + timestamp);
                FileOutputFormat.setOutputPath(secondconf, new Path(temp_outputPaths));

                //second job to get ranking list
                JobClient.runJob(secondconf);
                moveResults(secondconf, outputPaths, temp_outputPaths);
                Path temp = new Path(outputPaths + "_1_" + timestamp);
                temp.getFileSystem(conf).delete(temp);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }
    } catch (Exception e) {
        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }

}

From source file:hitune.analysis.mapreduce.processor.SystemLog.java

License:Apache License

@Override
public void run() {

    // TODO Auto-generated method stub
    long timestamp = System.currentTimeMillis();
    JobConf conf = new JobConf(this.conf, SystemLog.class);

    try {//from w w w. j  ava 2s  .c  o m
        conf.setJobName(this.getClass().getSimpleName() + timestamp);

        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(SystemLog.MapClass.class);
        conf.setReducerClass(SystemLog.ReduceClass.class);

        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputKeyClass(Text.class);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));
            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");

        }

    } catch (Exception e) {
        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }

}

From source file:IndexService.IndexMergeMR.java

License:Open Source License

public static RunningJob run(String inputfiles, String outputdir, Configuration conf) {
    if (inputfiles == null || outputdir == null)
        return null;

    JobConf job = new JobConf(conf);
    job.setJobName("MergeIndexMR");
    job.setJarByClass(IndexMergeMR.class);
    job.setNumReduceTasks(1);//from  www .  j a va 2 s .  c  o m
    FileSystem fs = null;
    try {
        fs = FileSystem.get(job);
        fs.delete(new Path(outputdir), true);

        String[] ifs = inputfiles.split(",");
        TreeSet<String> files = new TreeSet<String>();
        for (int i = 0; i < ifs.length; i++) {
            IFormatDataFile ifdf = new IFormatDataFile(job);
            ifdf.open(ifs[i]);
            Collection<String> strs = ifdf.fileInfo().head().getUdi().infos().values();
            for (String str : strs) {
                files.add(str);
            }
            ifdf.close();
        }
        StringBuffer sb = new StringBuffer();
        for (String str : files) {
            sb.append(str + ",");
        }
        job.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1));

        IFormatDataFile ifdf = new IFormatDataFile(job);
        ifdf.open(ifs[0]);

        HashMap<Integer, IRecord.IFType> map = ifdf.fileInfo().head().fieldMap().fieldtypes();
        ArrayList<String> fieldStrings = new ArrayList<String>();

        for (int i = 0; i < map.size(); i++) {
            IRecord.IFType type = map.get(i);
            fieldStrings.add(type.type() + ConstVar.RecordSplit + type.idx());
        }

        job.setStrings(ConstVar.HD_fieldMap, fieldStrings.toArray(new String[fieldStrings.size()]));
        job.set("datafiletype", ifdf.fileInfo().head().getUdi().infos().get(123456));
        ifdf.close();
    } catch (Exception e2) {
        e2.printStackTrace();
    }

    FileInputFormat.setInputPaths(job, inputfiles);
    FileOutputFormat.setOutputPath(job, new Path(outputdir));

    job.setOutputKeyClass(IndexKey.class);
    job.setOutputValueClass(IndexValue.class);

    job.setPartitionerClass(IndexMergePartitioner.class);

    job.setMapperClass(MergeIndexMap.class);
    job.setCombinerClass(MergeIndexReduce.class);
    job.setReducerClass(MergeIndexReduce.class);

    job.setInputFormat(IndexMergeIFormatInputFormat.class);
    job.setOutputFormat(IndexMergeIFormatOutputFormat.class);

    try {
        JobClient jc = new JobClient(job);
        return jc.submitJob(job);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:IndexService.IndexMR.java

License:Open Source License

public static RunningJob run(Configuration conf2, String inputfiles, boolean column, String ids,
        String outputdir) {/*from  w w  w .  j av  a2s.c  o m*/
    if (inputfiles == null || outputdir == null)
        return null;

    JobConf conf = new JobConf(conf2);
    conf.setJobName("IndexMR:\t" + ids);
    conf.setJarByClass(IndexMR.class);
    FileSystem fs = null;
    try {
        fs = FileSystem.get(conf);
        fs.delete(new Path(outputdir), true);
    } catch (IOException e3) {
        e3.printStackTrace();
    }

    conf.set("index.ids", ids);
    if (column) {
        conf.set("datafiletype", "column");
    } else {
        conf.set("datafiletype", "format");
    }

    String[] ifs = inputfiles.split(",");
    long wholerecnum = 0;

    String[] idxs = ids.split(",");
    String[] fieldStrings = new String[idxs.length + 2];

    if (!column) {
        IFormatDataFile ifdf;
        try {
            ifdf = new IFormatDataFile(conf);
            ifdf.open(ifs[0]);
            for (int i = 0; i < idxs.length; i++) {
                int id = Integer.parseInt(idxs[i]);
                byte type = ifdf.fileInfo().head().fieldMap().fieldtypes().get(id).type();
                fieldStrings[i] = type + ConstVar.RecordSplit + i;
            }
            ifdf.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    } else {
        try {
            IColumnDataFile icdf = new IColumnDataFile(conf);
            icdf.open(ifs[0]);
            for (int i = 0; i < idxs.length; i++) {
                int id = Integer.parseInt(idxs[i]);
                byte type = icdf.fieldtypes().get(id).type();
                fieldStrings[i] = type + ConstVar.RecordSplit + i;
            }
            icdf.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    fieldStrings[fieldStrings.length - 2] = ConstVar.FieldType_Short + ConstVar.RecordSplit
            + (fieldStrings.length - 2);
    fieldStrings[fieldStrings.length - 1] = ConstVar.FieldType_Int + ConstVar.RecordSplit
            + (fieldStrings.length - 1);

    conf.setStrings(ConstVar.HD_fieldMap, fieldStrings);

    if (!column) {
        conf.set(ConstVar.HD_index_filemap, inputfiles);
        for (String file : ifs) {
            IFormatDataFile fff;
            try {
                fff = new IFormatDataFile(conf);
                fff.open(file);
                wholerecnum += fff.segIndex().recnum();
                fff.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    } else {
        HashSet<String> files = new HashSet<String>();
        for (String file : ifs) {
            files.add(file);
        }
        StringBuffer sb = new StringBuffer();
        for (String str : files) {
            sb.append(str).append(",");
        }
        conf.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1));

        for (String file : files) {
            Path parent = new Path(file).getParent();
            try {
                FileStatus[] fss = fs.listStatus(parent);
                String openfile = "";
                for (FileStatus status : fss) {
                    if (status.getPath().toString().contains(file)) {
                        openfile = status.getPath().toString();
                        break;
                    }
                }
                IFormatDataFile fff = new IFormatDataFile(conf);
                fff.open(openfile);
                wholerecnum += fff.segIndex().recnum();
                fff.close();

            } catch (IOException e) {
                e.printStackTrace();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    conf.setNumReduceTasks((int) ((wholerecnum - 1) / (100000000) + 1));

    FileInputFormat.setInputPaths(conf, inputfiles);
    Path outputPath = new Path(outputdir);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setOutputKeyClass(IndexKey.class);
    conf.setOutputValueClass(IndexValue.class);

    conf.setPartitionerClass(IndexPartitioner.class);

    conf.setMapperClass(IndexMap.class);
    conf.setCombinerClass(IndexReduce.class);
    conf.setReducerClass(IndexReduce.class);

    if (column) {
        conf.setInputFormat(IColumnInputFormat.class);
    } else {
        conf.setInputFormat(IFormatInputFormat.class);
    }
    conf.setOutputFormat(IndexIFormatOutputFormat.class);

    try {
        JobClient jc = new JobClient(conf);
        return jc.submitJob(conf);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:infinidb.hadoop.example.InfiniDBOutputDriver.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    JobConf jobconf = new JobConf(conf, InfiniDoopDriver.class);
    DBConfiguration.configureDB(jobconf, "com.mysql.jdbc.Driver", "jdbc:mysql://srvswint4/tpch1", "root", "");
    String[] fields = { "n_nationkey", "n_name" };
    String[] outFields = { "id", "name" };
    jobconf.setInputFormat(IDBFileInputFormat.class);
    jobconf.setOutputFormat(InfiniDBOutputFormat.class);
    jobconf.setOutputKeyClass(NullWritable.class);
    jobconf.setOutputValueClass(Text.class);
    InfiniDBOutputFormat.setOutput(jobconf, "db", outFields);
    InfiniDBConfiguration idbconf = new InfiniDBConfiguration(jobconf);
    idbconf.setInputPath("input");
    idbconf.setOutputPath("output");
    idbconf.setInfiniDBHome("/usr/local/Calpont");

    jobconf.setMapperClass(InfiniDoopMapper.class);
    jobconf.setNumMapTasks(1);/*from   ww w  .  ja  v a  2  s. c om*/
    jobconf.setNumReduceTasks(2);
    JobClient client = new JobClient();
    client.setConf(jobconf);
    try {
        JobClient.runJob(jobconf);
    } catch (Exception e) {
        e.printStackTrace();
    }

    return 0;
}

From source file:infinidb.hadoop.example.InfiniDoopDriver.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    JobConf jobconf = new JobConf(conf, InfiniDoopDriver.class);
    DBConfiguration.configureDB(jobconf, "com.mysql.jdbc.Driver", "jdbc:mysql://srvswint4/tpch1", "root", "");
    String[] fields = { "n_nationkey", "n_name" };
    jobconf.setInputFormat(InfiniDBInputFormat.class);

    jobconf.setOutputKeyClass(LongWritable.class);
    jobconf.setOutputValueClass(Text.class);

    InfiniDBInputFormat.setInput(jobconf, InfiniDoopRecord.class, "nation", null, "n_nationkey", fields);

    InfiniDBConfiguration idbconf = new InfiniDBConfiguration(jobconf);
    idbconf.setOutputPath("output2");
    jobconf.setMapperClass(InfiniDoopInputMapper.class);
    jobconf.setNumMapTasks(4);/*  ww w.j ava  2s .  c o m*/
    jobconf.setNumReduceTasks(1);
    jobconf.set("mapred.textoutputformat.separator", "|");
    JobClient client = new JobClient();

    client.setConf(jobconf);
    try {
        JobClient.runJob(jobconf);
    } catch (Exception e) {
        e.printStackTrace();
    }

    return 0;
}

From source file:invertedIndex.startJob.java

public static void start(String[] args) {
    try {/*  w w  w .j av  a 2  s . c o m*/
        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        lineIndexMapper Map = new lineIndexMapper();
        conf.setMapperClass(Map.getClass());

        lineIndexReducer Reduce = new lineIndexReducer();
        conf.setCombinerClass(Reduce.getClass());
        conf.setReducerClass(Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

    } catch (Exception Exp) {

        Exp.printStackTrace();
    }
}

From source file:io.aos.t4f.hadoop.mapred.WordCountMapReduceTest.java

License:Apache License

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.//from w w  w  . j ava 2  s  .co m
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), WordCountMapReduceTest.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(WordCountMapper.class);
    conf.setCombinerClass(WordCountReducer.class);
    conf.setReducerClass(WordCountReducer.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}