Example usage for org.apache.hadoop.mapred JobConf setInputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setInputFormat.

Prototype

public void setInputFormat(Class<? extends InputFormat> theClass)

Source Link

Document

Set the InputFormat implementation for the map-reduce job.

Usage

From source file:edu.uci.ics.pregelix.example.utils.FilterCount.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(GraphPreProcessor.class);

    job.setJobName(FilterCount.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormat(TextInputFormat.class);

    job.setInputFormat(TextInputFormat.class);
    for (int i = 0; i < args.length - 1; i++) {
        FileInputFormat.addInputPath(job, new Path(args[i]));
    }/* w w w.j  a v  a 2s  . co m*/
    FileOutputFormat.setOutputPath(job, new Path(args[args.length - 1]));
    job.setNumReduceTasks(0);
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.example.utils.FindLargest.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(GraphPreProcessor.class);

    job.setJobName(GraphPreProcessor.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setCombinerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setInputFormat(TextInputFormat.class);
    for (int i = 0; i < args.length - 2; i++) {
        FileInputFormat.addInputPath(job, new Path(args[i]));
    }/*from   ww  w  . j  av  a 2s. c  o  m*/
    FileOutputFormat.setOutputPath(job, new Path(args[args.length - 2]));
    job.setNumReduceTasks(Integer.parseInt(args[args.length - 1]));
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.example.utils.GraphPreProcessor.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(GraphPreProcessor.class);

    job.setJobName(GraphPreProcessor.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(0);//from  www .  j  a va2  s.  c om
    JobClient.runJob(job);
}

From source file:edu.uci.ics.pregelix.example.utils.VertexAggregator.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(VertexAggregator.class);

    job.setJobName(VertexAggregator.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setCombinerClass(CombineRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    JobClient.runJob(job);//from   w  w w . j  ava2  s.  c  om
}

From source file:edu.uci.ics.pregelix.example.utils.VertexSorter.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(VertexSorter.class);

    job.setJobName(VertexSorter.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    JobClient.runJob(job);/* w w  w . j  av  a  2 s . c  om*/
}

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    // job.set("mapred.job.tracker", "local");
    // job.set("fs.default.name", "file:///");

    JobConf job = new JobConf();
    job.setJarByClass(HybridDriver.class);
    new GenericOptionsParser(job, args);
    setMapperAndRunner(job);/* www  .j  a v  a  2s  . c  om*/
    job.setMapOutputKeyClass(DocDocWritable.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(DocDocWritable.class);
    job.setOutputValueClass(FloatWritable.class);

    Path inputPath = new Path(INPUT_DIR);
    CustomSequenceFileInputFormat.addInputPath(job, inputPath);
    Path outputPath = new Path(OUTPUT_DIR);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    job.setBoolean("fs.hdfs.impl.disable.cache", true); //xun not sure if needed

    if (job.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE)) {
        job.setInputFormat(CustomSequenceFileInputFormat.class);
        Long splitMB = job.getLong(Config.SPLIT_MB_PROPERTY, Config.SPLIT_MB_VALUE) * 1024 * 1024;
        job.setLong("mapred.min.split.size", splitMB);
        job.setLong("mapred.max.split.size", splitMB);
        job.setLong("dfs.block.size", splitMB);
    } else {
        //  Comment the following of splitter for www experiments it assumes no splitting
        // of partitions for load balancing, should be fixed.
        Splitter.configure(job, inputPath);// remove comment unless for www
        job.setInputFormat(NonSplitableSequenceInputFormat.class); //remove comment
    }
    //SIGIR'14 two-stage balancing //not yet fully incorporated 
    if (job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE) != 0) {
        TwoStageLoadbalancing.main(job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE),
                new Path(PartDriver.OUTPUT_DIR), job);
    }
    JobSubmitter.run(job, "SIMILARITY", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
    if (job.getBoolean(Config.CONVERT_TEXT_PROPERTY, Config.CONVERT_TEXT_VALUE))
        IDMappingJob(args);
}

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

public static void IDMappingJob(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJarByClass(HybridDriver.class);
    job.setJobName("Converting binary similarity scores to text");
    job.setMapperClass(IDMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setNumReduceTasks(0);// w  w  w . j av a 2  s.c  om
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    Path inputPath = new Path(OUTPUT_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, inputPath);
    Path outputPath = new Path("SimilarityScores");
    job.setOutputFormat(TextOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);
    HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure
    JobSubmitter.run(job, "BINARY TO TEXT", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.lsh.projection.ProjectionsGenerator.java

License:Apache License

public static void main(JobConf job) throws IOException {
    int nBits/*D*/, nFeatures/*K*/, nReducers;
    job.setJobName(ProjectionsGenerator.class.getSimpleName());
    FileSystem fs = FileSystem.get(job);

    nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE);
    nFeatures = readCollectionFeatureCount(fs, job);
    setParameters(nBits, nFeatures);//from ww  w  .ja v  a 2  s. com
    nReducers = job.getInt(ProjectionLshDriver.LSH_NREDUCER_PROPERTY, ProjectionLshDriver.LSH_NREDUCER_VALUE);
    Path inputPath = new Path(INPUT_DIR);
    Path outputPath = new Path(OUTPUT_DIR);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    if (fs.exists(inputPath))
        fs.delete(inputPath, true);

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, new Path(inputPath.toString() + "/file"),
            IntWritable.class, IntWritable.class);
    for (int i = 0; i < nReducers; i++)
        writer.append(new IntWritable(i), new IntWritable(i));
    writer.close();

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    FileOutputFormat.setCompressOutput(job, false);

    job.set("mapred.child.java.opts", "-Xmx2048m");
    job.setInt("mapred.map.max.attempts", 10);
    job.setInt("mapred.reduce.max.attempts", 10);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(nReducers);

    job.setMapperClass(IdentityMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(ProjectionReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(RandomVector.class);

    JobSubmitter.run(job, "LSH", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.lsh.projection.SignaturesGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf job = new JobConf(SignaturesGenerator.class);
    new GenericOptionsParser(job, args);
    job.setJobName(SignaturesGenerator.class.getSimpleName());
    int nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE);
    setParameters();//w  w w  .j av a  2s.  c om
    FileSystem fs = FileSystem.get(job);
    prepareDistributedCache(job, fs, new Path(ProjectionsGenerator.OUTPUT_DIR));
    Path outputPath = new Path(OUTPUT_DIR);
    if (fs.exists(outputPath))
        fs.delete(outputPath);

    FileInputFormat.setInputPaths(job, INPUT_DIR);
    // Path(INPUT_DIR));
    FileOutputFormat.setOutputPath(job, outputPath);
    // FileOutputFormat.setCompressOutput(job, false);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.set("mapred.child.java.opts", "-Xmx2048m");
    job.setInt("mapred.map.max.attempts", 10);
    job.setInt("mapred.reduce.max.attempts", 10);
    job.setInt("mapred.task.timeout", 6000000);

    job.setMapperClass(SigMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(BitSignature.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BitSignature.class);

    JobSubmitter.run(job, "LSH", -1);
}

From source file:edu.ucsb.cs.partitioning.cosine.CosinePartitioning.java

License:Apache License

public static JobConf setInputOutput(JobConf job, Path inputPath, Path outputPath) throws IOException {
    job.setInputFormat(NonSplitableSequenceInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, inputPath);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(MultiSeqOutput.class);
    MultiSeqOutput.setOutputPath(job, outputPath);
    return job;/*from  ww  w. j  av  a2  s .c om*/
}