Example usage for org.apache.hadoop.mapred JobConf set

List of usage examples for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:gaffer.operation.simple.hdfs.handler.job.factory.AbstractAddElementsFromHdfsJobFactory.java

License:Apache License

protected void setupJobConf(final JobConf jobConf, final AddElementsFromHdfs operation, final Store store)
        throws IOException {
    LOGGER.info("Setting up job conf");
    jobConf.set(SCHEMA, new String(store.getSchema().toJson(false), CommonConstants.UTF_8));
    LOGGER.info("Added {} {} to job conf", SCHEMA,
            new String(store.getSchema().toJson(false), CommonConstants.UTF_8));
    jobConf.set(MAPPER_GENERATOR, operation.getMapperGeneratorClassName());
    LOGGER.info("Added {} of {} to job conf", MAPPER_GENERATOR, operation.getMapperGeneratorClassName());
    jobConf.set(VALIDATE, String.valueOf(operation.isValidate()));
    LOGGER.info("Added {} option of {} to job conf", VALIDATE, operation.isValidate());
    Integer numTasks = operation.getNumMapTasks();
    if (null != numTasks) {
        jobConf.setNumMapTasks(numTasks);
        LOGGER.info("Set number of map tasks to {} on job conf", numTasks);
    }//  w w  w .  j av  a  2  s .co  m
    numTasks = operation.getNumReduceTasks();
    if (null != numTasks) {
        jobConf.setNumReduceTasks(numTasks);
        LOGGER.info("Set number of reduce tasks to {} on job conf", numTasks);
    }
}

From source file:gobblin.source.extractor.hadoop.OldApiHadoopFileInputSource.java

License:Apache License

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
    JobConf jobConf = new JobConf(new Configuration());
    for (String key : state.getPropertyNames()) {
        jobConf.set(key, state.getProp(key));
    }/*from w  ww.  jav a2s  . c om*/

    if (state.contains(HadoopFileInputSource.FILE_INPUT_PATHS_KEY)) {
        for (String inputPath : state.getPropAsList(HadoopFileInputSource.FILE_INPUT_PATHS_KEY)) {
            FileInputFormat.addInputPath(jobConf, new Path(inputPath));
        }
    }

    try {
        FileInputFormat<K, V> fileInputFormat = getFileInputFormat(state, jobConf);
        InputSplit[] fileSplits = fileInputFormat.getSplits(jobConf,
                state.getPropAsInt(HadoopFileInputSource.FILE_SPLITS_DESIRED_KEY,
                        HadoopFileInputSource.DEFAULT_FILE_SPLITS_DESIRED));
        if (fileSplits == null || fileSplits.length == 0) {
            return ImmutableList.of();
        }

        Extract.TableType tableType = state.contains(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY)
                ? Extract.TableType
                        .valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase())
                : null;
        String tableNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY);
        String tableName = state.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY);

        List<WorkUnit> workUnits = Lists.newArrayListWithCapacity(fileSplits.length);
        for (InputSplit inputSplit : fileSplits) {
            // Create one WorkUnit per InputSplit
            FileSplit fileSplit = (FileSplit) inputSplit;
            Extract extract = createExtract(tableType, tableNamespace, tableName);
            WorkUnit workUnit = WorkUnit.create(extract);
            workUnit.setProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY,
                    HadoopUtils.serializeToString(fileSplit));
            workUnit.setProp(HadoopFileInputSource.FILE_SPLIT_PATH_KEY, fileSplit.getPath().toString());
            workUnits.add(workUnit);
        }

        return workUnits;
    } catch (IOException ioe) {
        throw new RuntimeException("Failed to get workunits", ioe);
    }
}

From source file:gobblin.source.extractor.hadoop.OldApiHadoopFileInputSource.java

License:Apache License

@Override
public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException {
    if (!workUnitState.contains(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY)) {
        throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId());
    }//from   w  w  w.j a v a  2  s. c o  m

    JobConf jobConf = new JobConf(new Configuration());
    for (String key : workUnitState.getPropertyNames()) {
        jobConf.set(key, workUnitState.getProp(key));
    }

    String fileSplitBytesStr = workUnitState.getProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY);
    FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr);
    FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, jobConf);
    RecordReader<K, V> recordReader = fileInputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);
    boolean readKeys = workUnitState.getPropAsBoolean(HadoopFileInputSource.FILE_INPUT_READ_KEYS_KEY,
            HadoopFileInputSource.DEFAULT_FILE_INPUT_READ_KEYS);
    return getExtractor(workUnitState, recordReader, fileSplit, readKeys);
}

From source file:gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.java

public static void main(String[] args) {
    //paths and input handling
    Path inputRatingsPath = new Path(args[0]); //movieid, userid, rating (text files)
    Path job1OutputPath = new Path("/user/hduser/partialResults");
    Path partialDistancesPath = new Path("/user/hduser/partialResults/part-*"); //member_nonMember \t partialDistance (sequence files)
    Path candidateMoviesPath = new Path("/user/hduser/partialResults/candidateMovies-*"); //candidateMovieId, nonMemberUserId_rating (text files)
    Path userSimilaritiesPath = new Path("/user/hduser/userSimilarities"); //similarity of each group member to his friends (text files)
    Path finalScoresPath = new Path(args[1]); //movieId \t outputScore

    int numReduceTasks = 56; //defaultValue
    if (args.length == 3) {
        numReduceTasks = Integer.parseInt(args[2]);
    }//w  ww  .  jav  a2 s  .c o m

    final float friendsSimThresh = 0.8f;

    String groupFilePath = "/user/hduser/group.txt"; //one-line csv file with user ids (text file)

    if (args.length < 2 || args.length > 3) {
        System.err.println(
                "Incorrect input. Example usage: hadoop jar ~/GroupRecs/MultiUser.jar inputPath outputPath [numReduceTasks]");
        return;
    }

    //JOB 1//
    JobClient client = new JobClient();
    JobConf conf = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class);

    try {
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(job1OutputPath)) {
            fs.delete(job1OutputPath, true);
        }
        if (fs.exists(userSimilaritiesPath)) {
            fs.delete(userSimilaritiesPath, true);
        }
        if (fs.exists(finalScoresPath)) {
            fs.delete(finalScoresPath, true);
        }
    } catch (IOException ex) {
        Logger.getLogger(MultiUserMain.class.getName()).log(Level.SEVERE, null, ex);
    }

    conf.setJobName("Multi-user approach - Job 1");
    System.out.println("Starting Job 1 (Multi-user approach)...");

    conf.setMapOutputKeyClass(VIntWritable.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(ByteWritable.class);

    conf.setInputFormat(TextInputFormat.class);
    //conf.setOutputFormat(TextOutputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);

    FileInputFormat.setInputPaths(conf, inputRatingsPath); //user ratings
    FileOutputFormat.setOutputPath(conf, job1OutputPath); //partial distances

    MultipleOutputs.addNamedOutput(conf, "candidateMovies", SequenceFileOutputFormat.class, VIntWritable.class,
            Text.class); //movieId, userId_rating

    conf.setMapperClass(gr.forth.ics.isl.grouprecsmr.job1.Job1Mapper.class);
    conf.setReducerClass(gr.forth.ics.isl.grouprecsmr.job1.Job1Reducer.class);

    conf.setNumReduceTasks(numReduceTasks);

    try {
        DistributedCache.addCacheFile(new URI(groupFilePath), conf); // group   
    } catch (URISyntaxException e1) {
        System.err.println(e1.toString());
    }

    conf.setInt("mapred.task.timeout", 6000000);

    client.setConf(conf);
    RunningJob job;
    try {
        job = JobClient.runJob(conf);
        job.waitForCompletion();
    } catch (Exception e) {
        System.err.println(e);
    }

    //JOB 2//
    System.out.println("Starting Job 2 (Multi-user approach)...");
    JobClient client2 = new JobClient();
    JobConf conf2 = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class);

    conf2.setJobName("Multi-user approach - Job 2");

    conf2.setMapOutputKeyClass(Text.class); //user pair (member_nonMember), where nonMember is in friends
    conf2.setMapOutputValueClass(ByteWritable.class);//similarity part unsquared

    conf2.setOutputKeyClass(Text.class); //user pair (member_nonMember), where nonMember is in friends
    conf2.setOutputValueClass(DoubleWritable.class);//similarity

    conf2.setInputFormat(SequenceFileInputFormat.class);
    //conf2.setInputFormat(TextInputFormat.class);
    conf2.setOutputFormat(TextOutputFormat.class);
    //conf2.setOutputFormat(SequenceFileOutputFormat.class);
    //SequenceFileOutputFormat.setOutputCompressionType(conf2, SequenceFile.CompressionType.BLOCK);

    FileInputFormat.setInputPaths(conf2, partialDistancesPath); //Job 1 output
    FileOutputFormat.setOutputPath(conf2, userSimilaritiesPath); //Job 2 output (similarity of each group member to his friends)

    conf2.setMapperClass(IdentityMapper.class);
    conf2.setReducerClass(gr.forth.ics.isl.grouprecsmr.job2.Job2ReducerMulti.class);

    int numSimilaritiesPartitions = numReduceTasks;
    conf2.setNumReduceTasks(numSimilaritiesPartitions);

    conf2.setFloat("friendsSimThreshold", friendsSimThresh);

    conf2.setInt("mapred.task.timeout", 6000000);
    conf2.set("io.sort.mb", "500");

    client2.setConf(conf2);
    RunningJob job2;
    try {
        job2 = JobClient.runJob(conf2);
        job2.waitForCompletion();
    } catch (Exception e) {
        System.err.println(e);
    }

    //JOB 3//
    System.out.println("Starting Job 3 (Multi-user approach)...");
    JobClient client3 = new JobClient();
    JobConf conf3 = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class);

    conf3.setJobName("Multi-user approach - Job 3");

    conf3.setMapOutputKeyClass(VIntWritable.class);
    conf3.setMapOutputValueClass(Text.class);

    conf3.setOutputKeyClass(VIntWritable.class);
    conf3.setOutputValueClass(DoubleWritable.class);

    conf3.setInputFormat(SequenceFileInputFormat.class);
    //conf3.setInputFormat(TextInputFormat.class);
    conf3.setOutputFormat(TextOutputFormat.class);
    //conf3.setOutputFormat(SequenceFileOutputFormat.class);
    //SequenceFileOutputFormat.setOutputCompressionType(conf3,SequenceFile.CompressionType.BLOCK);

    try {
        DistributedCache.addCacheFile(new URI(groupFilePath), conf3);
    } catch (URISyntaxException ex) {
        System.err.println("Could not add group file to distributed cache. " + ex);
    }
    for (int i = 0; i < numSimilaritiesPartitions; i++) {
        String reduceId = String.format("%05d", i); //5-digit int with leading
        try {
            DistributedCache.addCacheFile(new URI(userSimilaritiesPath.toString() + "/part-" + reduceId),
                    conf3);
        } catch (URISyntaxException ex) {
            System.err.println("Could not add similarities files to distributed cache. " + ex);
        }

    }

    FileInputFormat.setInputPaths(conf3, candidateMoviesPath); //Job 1 output (candidate movies)
    FileOutputFormat.setOutputPath(conf3, finalScoresPath); //Job 3 output (movie \t outputScore)

    //        conf3.setMapperClass(IdentityMapper.class);      
    conf3.setMapperClass(gr.forth.ics.isl.grouprecsmr.job3.Job3MapperMulti.class); //filtering out ratings from non-Friends
    conf3.setReducerClass(gr.forth.ics.isl.grouprecsmr.job3.Job3ReducerMulti.class);

    conf3.setInt("mapred.task.timeout", 6000000);
    conf3.set("io.sort.mb", "500");

    conf3.setNumReduceTasks(numReduceTasks);

    client3.setConf(conf3);
    RunningJob job3;
    try {
        job3 = JobClient.runJob(conf3);
        job3.waitForCompletion();
    } catch (Exception e) {
        System.err.println(e);
    }
}

From source file:hadoop.UIUCWikifierAppHadoop.java

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobConf job = new JobConf(conf, UIUCWikifierAppHadoop.class);

    //      System.out.println("Run.. Envinronment Variables");
    //      java.util.Map<String,String> env = System.getenv();
    ////from w  ww . j a  va  2s  .com
    //      System.out.println("Printing environment variables");
    //      for(String k : env.keySet()){
    //         System.out.println(k + "\t" + env.get(k));
    //      }
    //      String jlpValue = System.getProperty("java.library.path");
    //      System.out.println("java.library.path=" + jlpValue);
    //      System.setProperty("java.library.path", jlpValue + ":" + "/home/jgilme1/bin/gurobi550/linux64/lib");

    //process command line options
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    //change current working directory to hdfs path..
    job.setJobName("entitylinker");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormat(DistributeInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setMapperClass(Map.class);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setNumReduceTasks(0);
    job.setNumMapTasks(Integer.parseInt(args[2]));
    job.set("mapreduce.input.fileinputformat.split.minsize", "0");
    job.set("mapred.child.java.opts", "-Xmx16g");
    job.setNumTasksToExecutePerJvm(-1);
    //job.setMemoryForMapTask(new Long(12288));
    //job.set(JobConf.MAPRED_MAP_TASK_ULIMIT, "12582912");

    String gurobiHomeVariable = "GUROBI_HOME";
    String gurobiHomeValue = "/home/jgilme1/bin/gurobi560/linux64";
    String pathVariable = "PATH";
    String newPathValue = gurobiHomeValue + "/bin";
    String ldLibraryPathVariable = "LD_LIBRARY_PATH";
    String ldLibraryPathValue = gurobiHomeValue + "/lib";
    String grbLicenseFileVariable = "GRB_LICENSE_FILE";
    String grbLicenseFileValue = "/scratch6/usr/jgilme1/gurobiLicense/gurobi.lic";

    StringBuilder newEnvironment = new StringBuilder();
    newEnvironment.append(gurobiHomeVariable);
    newEnvironment.append("=");
    newEnvironment.append(gurobiHomeValue);
    newEnvironment.append(",");
    newEnvironment.append(pathVariable);
    newEnvironment.append("=");
    newEnvironment.append("$" + pathVariable + ":");
    newEnvironment.append(newPathValue);
    newEnvironment.append(",");
    newEnvironment.append(ldLibraryPathVariable);
    newEnvironment.append("=$" + ldLibraryPathVariable + ":");
    newEnvironment.append(ldLibraryPathValue);
    newEnvironment.append(",");
    newEnvironment.append(grbLicenseFileVariable);
    newEnvironment.append("=");
    newEnvironment.append(grbLicenseFileValue);

    //System.out.println(newEnvironment.toString());
    job.set(JobConf.MAPRED_MAP_TASK_ENV, newEnvironment.toString());

    DistributedCache.addCacheArchive(new URI("/user/jgilme1/entitylinking/Wikifier2013.tar.gz"), job);

    JobClient.runJob(job);
    return 0;
}

From source file:hibench.HiveDataGenerator.java

License:Apache License

private void createRankingsTable() throws IOException {

    LOG.info("Creating table rankings...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " rankings";

    job.setJobName(jobname);//  w w w  .  j a  v  a 2  s.co  m

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);

    job.setCombinerClass(ConcatTextCombiner.class);
    job.setReducerClass(CountRankingAndReplaceIdReducer.class);

    if (options.reds > 0) {
        job.setNumReduceTasks(options.reds);
    } else {
        job.setNumReduceTasks(DataOptions.getMaxNumReduce());
    }

    //      job.setNumReduceTasks(options.agents/2);

    /***
     * need to join result with LINK table so that to replace
     * url ids with real contents
     */
    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.T_LINK_PAGE), TextInputFormat.class,
            MyIdentityMapper.class);
    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class,
            TagRecordsMapper.class);

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");

        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.RANKINGS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Table link-page file " + paths.getPath(DataPaths.T_LINK_PAGE) + " as input");
    LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output");
    LOG.info("Ouput file " + paths.getResult(DataPaths.RANKINGS));
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.RANKINGS));
}

From source file:hibench.HiveDataGenerator.java

License:Apache License

private void createUserVisitsTable() throws IOException, URISyntaxException {

    LOG.info("Creating user visits...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " uservisits";
    job.setJobName(jobname);//  w  w w  .ja v  a  2s .  c  o  m

    /***
     * Set distributed cache file for table generation,
     * cache files include:
     * 1. user agents
     * 2. country code and language code
     * 3. search keys
     */

    DistributedCache.addCacheFile(paths.getPath(DataPaths.uagentf).toUri(), job);
    DistributedCache.addCacheFile(paths.getPath(DataPaths.countryf).toUri(), job);
    DistributedCache.addCacheFile(paths.getPath(DataPaths.searchkeyf).toUri(), job);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);

    visit.setJobConf(job);

    job.setInputFormat(TextInputFormat.class);

    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.DUMMY), NLineInputFormat.class,
            CreateRandomAccessMapper.class);
    MultipleInputs.addInputPath(job, paths.getPath(DataPaths.LINKS), TextInputFormat.class,
            TagRecordsMapper.class);

    job.setCombinerClass(CreateUserVisitsCombiner.class);
    job.setReducerClass(CreateUserVisitsReducer.class);

    if (options.reds > 0) {
        job.setNumReduceTasks(options.reds);
    } else {
        job.setNumReduceTasks(DataOptions.getMaxNumReduce());
    }

    //      job.setNumReduceTasks(options.agents/2);

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.USERVISITS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Dummy file " + paths.getPath(DataPaths.DUMMY) + " as input");
    LOG.info("Links file " + paths.getResult(DataPaths.LINKS) + " as output");
    LOG.info("Ouput file " + paths.getResult(DataPaths.USERVISITS));
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.USERVISITS));
}

From source file:hibench.PageRankDataGenerator.java

License:Apache License

private void createPageRankNodes() throws IOException {

    LOG.info("Creating PageRank nodes...", null);

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " pagerank nodes";

    job.setJobName(jobname);/*from   w  w  w .j  av  a2 s .  co m*/

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.LINKS));
    job.setInputFormat(TextInputFormat.class);

    if (options.PAGERANK_NODE_BALANCE) {
        /***
         * Balance the output order of nodes, to prevent the running
         * of pagerank bench from potential data skew
         */
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setMapperClass(BalancedLinkNodesMapper.class);
        job.setReducerClass(BalancedLinkNodesReducer.class);
        //         job.setPartitionerClass(ModulusPartitioner.class);

        if (options.reds > 0) {
            job.setNumReduceTasks(options.reds);
        } else {
            job.setNumReduceTasks(DataOptions.getMaxNumReduce());
        }
    } else {
        job.setMapOutputKeyClass(Text.class);

        job.setMapperClass(OutputLinkNodesMapper.class);
        job.setNumReduceTasks(0);
    }

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.VERTICALS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Links file " + paths.getPath(DataPaths.LINKS) + " as input");
    LOG.info("Vertices file " + paths.getResult(DataPaths.VERTICALS) + " as output");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.VERTICALS));
}

From source file:hibench.PageRankDataGenerator.java

License:Apache License

/***
 * Create pagerank edge table, output link A->B as <A, B> pairs
 * @throws IOException/*from www. j av  a 2  s  .c o m*/
 */
private void createPageRankLinks() throws IOException {

    LOG.info("Creating PageRank links", null);

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " pagerank links";

    job.setJobName(jobname);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.T_LINK_PAGE));
    job.setInputFormat(TextInputFormat.class);

    job.setMapperClass(OutputLinkEdgesMapper.class);

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.EDGES));

    LOG.info("Running Job: " + jobname);
    LOG.info("Table link-page " + paths.getPath(DataPaths.T_LINK_PAGE) + " as input");
    LOG.info("Edges file " + paths.getResult(DataPaths.EDGES) + " as output");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.EDGES));
}

From source file:hitune.analysis.mapreduce.CSVFileOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable progress)
        throws IOException {

    job.set("mapred.textoutputformat.separator", SEPEARATOR_COMMA);
    String filename = job.get(AnalysisProcessorConfiguration.outputfilename);
    if (filename == null || filename.equals("")) {
        filename = name;//from ww  w . j  a v  a 2  s.  c  o  m
    } else {
        filename += "-" + name;
        //FileOutputFormat.setWorkOutputPath(job,new Path(job.get("mapred.output.dir")+"/" + filename));
    }

    return new CSVFileWriter(super.getRecordWriter(fs, job, filename, progress));
}