Example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks

List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks.

Prototype

public void setNumReduceTasks(int n) 

Source Link

Document

Set the requisite number of reduce tasks for this job.

Usage

From source file:org.apache.oozie.action.hadoop.LauncherMainTester.java

License:Apache License

private static JobConf createSleepMapperReducerJobConf() {
    JobConf jConf = new JobConf(true);
    jConf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml")));
    jConf.setMapperClass(SleepMapperReducerForTest.class);
    jConf.setReducerClass(SleepMapperReducerForTest.class);
    jConf.setOutputKeyClass(Text.class);
    jConf.setOutputValueClass(IntWritable.class);
    jConf.setInputFormat(TextInputFormat.class);
    jConf.setOutputFormat(TextOutputFormat.class);
    jConf.setNumReduceTasks(1);
    jConf.set(SleepMapperReducerForTest.SLEEP_TIME_MILLIS_KEY, "60000");
    return jConf;
}

From source file:org.apache.oozie.action.hadoop.LauncherMapperHelper.java

License:Apache License

public static void setupLauncherInfo(JobConf launcherConf, String jobId, String actionId, Path actionDir,
        String recoveryId, Configuration actionConf, String prepareXML)
        throws IOException, HadoopAccessorException {

    launcherConf.setMapperClass(LauncherMapper.class);
    launcherConf.setSpeculativeExecution(false);
    launcherConf.setNumMapTasks(1);/*  w  w w .  j  a  v  a 2  s  . com*/
    launcherConf.setNumReduceTasks(0);

    launcherConf.set(LauncherMapper.OOZIE_JOB_ID, jobId);
    launcherConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId);
    launcherConf.set(LauncherMapper.OOZIE_ACTION_DIR_PATH, actionDir.toString());
    launcherConf.set(LauncherMapper.OOZIE_ACTION_RECOVERY_ID, recoveryId);
    launcherConf.set(LauncherMapper.ACTION_PREPARE_XML, prepareXML);

    actionConf.set(LauncherMapper.OOZIE_JOB_ID, jobId);
    actionConf.set(LauncherMapper.OOZIE_ACTION_ID, actionId);

    if (Services.get().getConf().getBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache",
            false)) {
        List<String> purgedEntries = new ArrayList<String>();
        Collection<String> entries = actionConf.getStringCollection("mapreduce.job.cache.files");
        for (String entry : entries) {
            if (entry.contains("#")) {
                purgedEntries.add(entry);
            }
        }
        actionConf.setStrings("mapreduce.job.cache.files",
                purgedEntries.toArray(new String[purgedEntries.size()]));
        launcherConf.setBoolean("oozie.hadoop-2.0.2-alpha.workaround.for.distributed.cache", true);
    }

    FileSystem fs = Services.get().get(HadoopAccessorService.class)
            .createFileSystem(launcherConf.get("user.name"), actionDir.toUri(), launcherConf);
    fs.mkdirs(actionDir);

    OutputStream os = fs.create(new Path(actionDir, LauncherMapper.ACTION_CONF_XML));
    try {
        actionConf.writeXml(os);
    } finally {
        IOUtils.closeSafely(os);
    }

    launcherConf.setInputFormat(OozieLauncherInputFormat.class);
    launcherConf.set("mapred.output.dir", new Path(actionDir, "output").toString());
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapreduceExec.MapReduceLauncher.java

License:Apache License

/**
 * Submit a Pig job to hadoop./*from ww w.j a va2s .  c  o m*/
 * 
 * @param mapFuncs
 *            a list of map functions to apply to the inputs. The cardinality of the list should
 *            be the same as input's cardinality.
 * @param groupFuncs
 *            a list of grouping functions to apply to the inputs. The cardinality of the list
 *            should be the same as input's cardinality.
 * @param reduceFunc
 *            the reduce function.
 * @param mapTasks
 *            the number of map tasks to use.
 * @param reduceTasks
 *            the number of reduce tasks to use.
 * @param input
 *            a list of inputs
 * @param output
 *            the path of the output.
 * @return an indicator of success or failure.
 * @throws IOException
 */
public boolean launchPig(POMapreduce pom) throws IOException {
    JobConf conf = new JobConf(config);
    setJobProperties(conf, pom);
    Properties properties = pom.pigContext.getProperties();
    ConfigurationValidator.validatePigProperties(properties);
    String jobName = properties.getProperty(PigContext.JOB_NAME);
    conf.setJobName(jobName);
    boolean success = false;
    List<String> funcs = new ArrayList<String>();

    if (pom.toMap != null) {
        for (EvalSpec es : pom.toMap)
            funcs.addAll(es.getFuncs());
    }
    if (pom.groupFuncs != null) {
        for (EvalSpec es : pom.groupFuncs)
            funcs.addAll(es.getFuncs());
    }
    if (pom.toReduce != null) {
        funcs.addAll(pom.toReduce.getFuncs());
    }

    // create jobs.jar locally and pass it to hadoop
    File submitJarFile = File.createTempFile("Job", ".jar");
    try {
        FileOutputStream fos = new FileOutputStream(submitJarFile);
        JarManager.createJar(fos, funcs, null, pom.pigContext);
        log.debug("Job jar size = " + submitJarFile.length());
        conf.setJar(submitJarFile.getPath());
        String user = System.getProperty("user.name");
        conf.setUser(user != null ? user : "Pigster");

        conf.set("pig.spill.size.threshold", properties.getProperty("pig.spill.size.threshold"));
        conf.set("pig.spill.gc.activation.size", properties.getProperty("pig.spill.gc.activation.size"));

        if (pom.reduceParallelism != -1) {
            conf.setNumReduceTasks(pom.reduceParallelism);
        }
        if (pom.toMap != null) {
            conf.set("pig.mapFuncs", ObjectSerializer.serialize(pom.toMap));
        }
        if (pom.toCombine != null) {
            conf.set("pig.combineFunc", ObjectSerializer.serialize(pom.toCombine));
            // this is to make sure that combiner is only called once
            // since we can't handle no combine or multiple combines
            conf.setCombineOnceOnly(true);
        }
        if (pom.groupFuncs != null) {
            conf.set("pig.groupFuncs", ObjectSerializer.serialize(pom.groupFuncs));
        }
        if (pom.toReduce != null) {
            conf.set("pig.reduceFunc", ObjectSerializer.serialize(pom.toReduce));
        }
        if (pom.toSplit != null) {
            conf.set("pig.splitSpec", ObjectSerializer.serialize(pom.toSplit));
        }
        if (pom.pigContext != null) {
            conf.set("pig.pigContext", ObjectSerializer.serialize(pom.pigContext));
        }
        conf.setMapRunnerClass(PigMapReduce.class);
        if (pom.toCombine != null) {
            conf.setCombinerClass(PigCombine.class);
            //conf.setCombinerClass(PigMapReduce.class);
        }
        if (pom.quantilesFile != null) {
            conf.set("pig.quantilesFile", pom.quantilesFile);
        } else {
            // this is not a sort job - can use byte comparison to speed up processing
            conf.setOutputKeyComparatorClass(PigWritableComparator.class);
        }
        if (pom.partitionFunction != null) {
            conf.setPartitionerClass(SortPartitioner.class);
        }
        conf.setReducerClass(PigMapReduce.class);
        conf.setInputFormat(PigInputFormat.class);
        conf.setOutputFormat(PigOutputFormat.class);
        // not used starting with 0.15 conf.setInputKeyClass(Text.class);
        // not used starting with 0.15 conf.setInputValueClass(Tuple.class);
        conf.setOutputKeyClass(Tuple.class);
        if (pom.userComparator != null) {
            conf.setOutputKeyComparatorClass(pom.userComparator);
        }
        conf.setOutputValueClass(IndexedTuple.class);
        conf.set("pig.inputs", ObjectSerializer.serialize(pom.inputFileSpecs));

        conf.setOutputPath(new Path(pom.outputFileSpec.getFileName()));
        conf.set("pig.storeFunc", ObjectSerializer.serialize(pom.outputFileSpec.getFuncSpec()));

        // Setup the DistributedCache for this job
        setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.ship.files", true);
        setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.cache.files", false);

        // Setup the logs directory for this job
        String jobOutputFileName = pom.pigContext.getJobOutputFile();
        if (jobOutputFileName != null && jobOutputFileName.length() > 0) {
            Path jobOutputFile = new Path(pom.pigContext.getJobOutputFile());
            conf.set("pig.output.dir", jobOutputFile.getParent().toString());
            conf.set("pig.streaming.log.dir", new Path(jobOutputFile, LOG_DIR).toString());
        }

        //
        // Now, actually submit the job (using the submit name)
        //
        JobClient jobClient = execEngine.getJobClient();
        RunningJob status = jobClient.submitJob(conf);
        log.debug("submitted job: " + status.getJobID());

        long sleepTime = 1000;
        double lastQueryProgress = -1.0;
        int lastJobsQueued = -1;
        double lastMapProgress = -1.0;
        double lastReduceProgress = -1.0;
        while (true) {
            try {
                Thread.sleep(sleepTime);
            } catch (Exception e) {
            }

            if (status.isComplete()) {
                success = status.isSuccessful();
                if (log.isDebugEnabled()) {
                    StringBuilder sb = new StringBuilder();
                    sb.append("Job finished ");
                    sb.append((success ? "" : "un"));
                    sb.append("successfully");
                    log.debug(sb.toString());
                }
                if (success) {
                    mrJobNumber++;
                }
                double queryProgress = ((double) mrJobNumber) / ((double) numMRJobs);
                if (queryProgress > lastQueryProgress) {
                    if (log.isInfoEnabled()) {
                        StringBuilder sbProgress = new StringBuilder();
                        sbProgress.append("Pig progress = ");
                        sbProgress.append(((int) (queryProgress * 100)));
                        sbProgress.append("%");
                        log.info(sbProgress.toString());
                    }
                    lastQueryProgress = queryProgress;
                }
                break;
            } else // still running
            {
                double mapProgress = status.mapProgress();
                double reduceProgress = status.reduceProgress();
                if (lastMapProgress != mapProgress || lastReduceProgress != reduceProgress) {
                    if (log.isDebugEnabled()) {
                        StringBuilder sbProgress = new StringBuilder();
                        sbProgress.append("Hadoop job progress: Map=");
                        sbProgress.append((int) (mapProgress * 100));
                        sbProgress.append("% Reduce=");
                        sbProgress.append((int) (reduceProgress * 100));
                        sbProgress.append("%");
                        log.debug(sbProgress.toString());
                    }
                    lastMapProgress = mapProgress;
                    lastReduceProgress = reduceProgress;
                }
                double numJobsCompleted = mrJobNumber;
                double thisJobProgress = (mapProgress + reduceProgress) / 2.0;
                double queryProgress = (numJobsCompleted + thisJobProgress) / ((double) numMRJobs);
                if (queryProgress > lastQueryProgress) {
                    if (log.isInfoEnabled()) {
                        StringBuilder sbProgress = new StringBuilder();
                        sbProgress.append("Pig progress = ");
                        sbProgress.append(((int) (queryProgress * 100)));
                        sbProgress.append("%");
                        log.info(sbProgress.toString());
                    }
                    lastQueryProgress = queryProgress;
                }
            }
        }

        // bug 1030028: if the input file is empty; hadoop doesn't create the output file!
        Path outputFile = conf.getOutputPath();
        String outputName = outputFile.getName();
        int colon = outputName.indexOf(':');
        if (colon != -1) {
            outputFile = new Path(outputFile.getParent(), outputName.substring(0, colon));
        }

        try {
            ElementDescriptor descriptor = ((HDataStorage) (pom.pigContext.getDfs()))
                    .asElement(outputFile.toString());

            if (success && !descriptor.exists()) {

                // create an empty output file
                PigFile f = new PigFile(outputFile.toString(), false);
                f.store(BagFactory.getInstance().newDefaultBag(), new PigStorage(), pom.pigContext);
            }
        } catch (DataStorageException e) {
            throw WrappedIOException.wrap("Failed to obtain descriptor for " + outputFile.toString(), e);
        }

        if (!success) {
            // go find the error messages
            getErrorMessages(jobClient.getMapTaskReports(status.getJobID()), "map");
            getErrorMessages(jobClient.getReduceTaskReports(status.getJobID()), "reduce");
        } else {
            long timeSpent = 0;

            // NOTE: this call is crashing due to a bug in Hadoop; the bug is known and the patch has not been applied yet.
            TaskReport[] mapReports = jobClient.getMapTaskReports(status.getJobID());
            TaskReport[] reduceReports = jobClient.getReduceTaskReports(status.getJobID());
            for (TaskReport r : mapReports) {
                timeSpent += (r.getFinishTime() - r.getStartTime());
            }
            for (TaskReport r : reduceReports) {
                timeSpent += (r.getFinishTime() - r.getStartTime());
            }
            totalHadoopTimeSpent += timeSpent;
        }
    } catch (Exception e) {
        // Do we need different handling for different exceptions
        e.printStackTrace();
        throw WrappedIOException.wrap(e);
    } finally {
        submitJarFile.delete();
    }
    return success;
}

From source file:org.apache.pig.test.pigmix.mapreduce.L1.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Parameters: inputDir outputDir parallel");
        System.exit(1);//w  ww. j ava  2  s. c om
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    JobConf lp = new JobConf(L1.class);
    lp.setJobName("L1 Load Page Views");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(Text.class);
    lp.setOutputValueClass(IntWritable.class);
    lp.setMapperClass(ReadPageViews.class);
    lp.setCombinerClass(Group.class);
    lp.setReducerClass(Group.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L1out"));
    lp.setNumReduceTasks(Integer.parseInt(parallel));
    Job group = new Job(lp);

    JobControl jc = new JobControl("L1 join");
    jc.addJob(group);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
        ArrayList<Job> failures = jc.getFailedJobs();
        if (failures != null && failures.size() > 0) {
            for (Job failure : failures) {
                System.err.println(failure.getMessage());
            }
            break;
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
        }

        if (i % 10000 == 0) {
            System.out.println("Running jobs");
            ArrayList<Job> running = jc.getRunningJobs();
            if (running != null && running.size() > 0) {
                for (Job r : running) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Ready jobs");
            ArrayList<Job> ready = jc.getReadyJobs();
            if (ready != null && ready.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Waiting jobs");
            ArrayList<Job> waiting = jc.getWaitingJobs();
            if (waiting != null && waiting.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Successful jobs");
            ArrayList<Job> success = jc.getSuccessfulJobs();
            if (success != null && success.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
        }
        i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
            System.err.println(failure.getMessage());
        }
    }
    jc.stop();
}

From source file:org.apache.pig.test.pigmix.mapreduce.L10.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Parameters: inputDir outputDir parallel");
        System.exit(1);//from   w  w  w . j  a va2  s  .  c om
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    JobConf lp = new JobConf(L10.class);
    lp.setJobName("L10 Load Page Views");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(MyType.class);
    lp.setOutputValueClass(Text.class);
    lp.setMapperClass(ReadPageViews.class);
    lp.setReducerClass(Group.class);
    lp.setPartitionerClass(MyPartitioner.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L10out"));
    // Hardcode the parallel to 40 since MyPartitioner assumes it
    lp.setNumReduceTasks(40);
    Job group = new Job(lp);

    JobControl jc = new JobControl("L10 join");
    jc.addJob(group);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
        ArrayList<Job> failures = jc.getFailedJobs();
        if (failures != null && failures.size() > 0) {
            for (Job failure : failures) {
                System.err.println(failure.getMessage());
            }
            break;
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
        }

        if (i % 10000 == 0) {
            System.out.println("Running jobs");
            ArrayList<Job> running = jc.getRunningJobs();
            if (running != null && running.size() > 0) {
                for (Job r : running) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Ready jobs");
            ArrayList<Job> ready = jc.getReadyJobs();
            if (ready != null && ready.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Waiting jobs");
            ArrayList<Job> waiting = jc.getWaitingJobs();
            if (waiting != null && waiting.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Successful jobs");
            ArrayList<Job> success = jc.getSuccessfulJobs();
            if (success != null && success.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
        }
        i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
            System.err.println(failure.getMessage());
        }
    }
    jc.stop();
}

From source file:org.apache.pig.test.pigmix.mapreduce.L11.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Parameters: inputDir outputDir parallel");
        System.exit(1);/*from w  w  w .j  a va  2 s. c o  m*/
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    String user = System.getProperty("user.name");
    JobConf lp = new JobConf(L11.class);
    lp.setJobName("L11 Load Page Views");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(Text.class);
    lp.setOutputValueClass(Text.class);
    lp.setMapperClass(ReadPageViews.class);
    lp.setCombinerClass(ReadPageViews.class);
    lp.setReducerClass(ReadPageViews.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/p"));
    lp.setNumReduceTasks(Integer.parseInt(parallel));
    Job loadPages = new Job(lp);

    JobConf lu = new JobConf(L11.class);
    lu.setJobName("L11 Load Widerow");
    lu.setInputFormat(TextInputFormat.class);
    lu.setOutputKeyClass(Text.class);
    lu.setOutputValueClass(Text.class);
    lu.setMapperClass(ReadWideRow.class);
    lu.setCombinerClass(ReadWideRow.class);
    lu.setReducerClass(ReadWideRow.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lu.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lu, new Path(inputDir + "/widerow"));
    FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/wr"));
    lu.setNumReduceTasks(Integer.parseInt(parallel));
    Job loadWideRow = new Job(lu);

    JobConf join = new JobConf(L11.class);
    join.setJobName("L11 Union WideRow and Pages");
    join.setInputFormat(KeyValueTextInputFormat.class);
    join.setOutputKeyClass(Text.class);
    join.setOutputValueClass(Text.class);
    join.setMapperClass(IdentityMapper.class);
    join.setCombinerClass(Union.class);
    join.setReducerClass(Union.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        join.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(join, new Path(outputDir + "/p"));
    FileInputFormat.addInputPath(join, new Path(outputDir + "/wr"));
    FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L11out"));
    join.setNumReduceTasks(Integer.parseInt(parallel));
    Job joinJob = new Job(join);
    joinJob.addDependingJob(loadPages);
    joinJob.addDependingJob(loadWideRow);

    JobControl jc = new JobControl("L11 join");
    jc.addJob(loadPages);
    jc.addJob(loadWideRow);
    jc.addJob(joinJob);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
        ArrayList<Job> failures = jc.getFailedJobs();
        if (failures != null && failures.size() > 0) {
            for (Job failure : failures) {
                System.err.println(failure.getMessage());
            }
            break;
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
        }

        if (i % 10000 == 0) {
            System.out.println("Running jobs");
            ArrayList<Job> running = jc.getRunningJobs();
            if (running != null && running.size() > 0) {
                for (Job r : running) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Ready jobs");
            ArrayList<Job> ready = jc.getReadyJobs();
            if (ready != null && ready.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Waiting jobs");
            ArrayList<Job> waiting = jc.getWaitingJobs();
            if (waiting != null && waiting.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Successful jobs");
            ArrayList<Job> success = jc.getSuccessfulJobs();
            if (success != null && success.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
        }
        i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
            System.err.println(failure.getMessage());
        }
    }
    jc.stop();
}

From source file:org.apache.pig.test.pigmix.mapreduce.L12.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Parameters: inputDir outputDir parallel");
        System.exit(1);/*from   www  .j  av a  2 s  .co  m*/
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    String user = System.getProperty("user.name");
    JobConf lp = new JobConf(L12.class);
    lp.setJobName("L12 Find Highest Value Page Per User");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(Text.class);
    lp.setOutputValueClass(DoubleWritable.class);
    lp.setMapperClass(HighestValuePagePerUser.class);
    lp.setCombinerClass(HighestValuePagePerUser.class);
    lp.setReducerClass(HighestValuePagePerUser.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/highest_value_page_per_user"));
    lp.setNumReduceTasks(Integer.parseInt(parallel));
    Job loadPages = new Job(lp);

    JobConf lu = new JobConf(L12.class);
    lu.setJobName("L12 Find Total Timespent per Term");
    lu.setInputFormat(TextInputFormat.class);
    lu.setOutputKeyClass(Text.class);
    lu.setOutputValueClass(LongWritable.class);
    lu.setMapperClass(TotalTimespentPerTerm.class);
    lu.setCombinerClass(TotalTimespentPerTerm.class);
    lu.setReducerClass(TotalTimespentPerTerm.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lu.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lu, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/total_timespent_per_term"));
    lu.setNumReduceTasks(Integer.parseInt(parallel));
    Job loadUsers = new Job(lu);

    JobConf join = new JobConf(L12.class);
    join.setJobName("L12 Find Queries Per Action");
    join.setInputFormat(TextInputFormat.class);
    join.setOutputKeyClass(Text.class);
    join.setOutputValueClass(LongWritable.class);
    join.setMapperClass(QueriesPerAction.class);
    join.setCombinerClass(QueriesPerAction.class);
    join.setReducerClass(QueriesPerAction.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        join.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(join, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(join, new Path(outputDir + "/queries_per_action"));
    join.setNumReduceTasks(Integer.parseInt(parallel));
    Job joinJob = new Job(join);

    JobControl jc = new JobControl("L12 join");
    jc.addJob(loadPages);
    jc.addJob(loadUsers);
    jc.addJob(joinJob);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
        ArrayList<Job> failures = jc.getFailedJobs();
        if (failures != null && failures.size() > 0) {
            for (Job failure : failures) {
                System.err.println(failure.getMessage());
            }
            break;
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
        }

        if (i % 10000 == 0) {
            System.out.println("Running jobs");
            ArrayList<Job> running = jc.getRunningJobs();
            if (running != null && running.size() > 0) {
                for (Job r : running) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Ready jobs");
            ArrayList<Job> ready = jc.getReadyJobs();
            if (ready != null && ready.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Waiting jobs");
            ArrayList<Job> waiting = jc.getWaitingJobs();
            if (waiting != null && waiting.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Successful jobs");
            ArrayList<Job> success = jc.getSuccessfulJobs();
            if (success != null && success.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
        }
        i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
            System.err.println(failure.getMessage());
        }
    }
    jc.stop();
}

From source file:org.apache.pig.test.pigmix.mapreduce.L13.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Parameters: inputDir outputDir parallel");
        System.exit(1);//from  www . j a  va2  s  . co  m
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    String user = System.getProperty("user.name");
    JobConf lp = new JobConf(L13.class);
    lp.setJobName("L13 Load Left Page Views");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(Text.class);
    lp.setOutputValueClass(Text.class);
    lp.setMapperClass(ReadLeftPageViews.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/indexed_left_pages"));
    lp.setNumReduceTasks(0);
    Job loadPages = new Job(lp);

    JobConf lu = new JobConf(L13.class);
    lu.setJobName("L13 Load Right Page Views");
    lu.setInputFormat(TextInputFormat.class);
    lu.setOutputKeyClass(Text.class);
    lu.setOutputValueClass(Text.class);
    lu.setMapperClass(ReadRightPageViews.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lu.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lu, new Path(inputDir + "/power_users_samples"));
    FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/indexed_right_pages"));
    lu.setNumReduceTasks(0);
    Job loadUsers = new Job(lu);

    JobConf join = new JobConf(L13.class);
    join.setJobName("L13 Join Two Pages");
    join.setInputFormat(KeyValueTextInputFormat.class);
    join.setOutputKeyClass(Text.class);
    join.setOutputValueClass(Text.class);
    join.setMapperClass(IdentityMapper.class);
    join.setReducerClass(Join.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        join.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_left_pages"));
    FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_right_pages"));
    FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L13out"));
    join.setNumReduceTasks(Integer.parseInt(parallel));
    Job joinJob = new Job(join);
    joinJob.addDependingJob(loadPages);
    joinJob.addDependingJob(loadUsers);

    JobControl jc = new JobControl("L13 join");
    jc.addJob(loadPages);
    jc.addJob(loadUsers);
    jc.addJob(joinJob);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
        ArrayList<Job> failures = jc.getFailedJobs();
        if (failures != null && failures.size() > 0) {
            for (Job failure : failures) {
                System.err.println(failure.getMessage());
            }
            break;
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
        }

        if (i % 10000 == 0) {
            System.out.println("Running jobs");
            ArrayList<Job> running = jc.getRunningJobs();
            if (running != null && running.size() > 0) {
                for (Job r : running) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Ready jobs");
            ArrayList<Job> ready = jc.getReadyJobs();
            if (ready != null && ready.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Waiting jobs");
            ArrayList<Job> waiting = jc.getWaitingJobs();
            if (waiting != null && waiting.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Successful jobs");
            ArrayList<Job> success = jc.getSuccessfulJobs();
            if (success != null && success.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
        }
        i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
            System.err.println(failure.getMessage());
        }
    }
    jc.stop();
}

From source file:org.apache.pig.test.pigmix.mapreduce.L14.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Parameters: inputDir outputDir parallel");
        System.exit(1);//w  w  w .  j  ava2  s  .  c o  m
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    String user = System.getProperty("user.name");
    JobConf lp = new JobConf(L14.class);
    lp.setJobName("L14 Load Page Views");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(Text.class);
    lp.setOutputValueClass(Text.class);
    lp.setMapperClass(ReadPageViews.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views_sorted"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/indexed_pages_14"));
    lp.setNumReduceTasks(0);
    Job loadPages = new Job(lp);

    JobConf lu = new JobConf(L14.class);
    lu.setJobName("L14 Load Users");
    lu.setInputFormat(TextInputFormat.class);
    lu.setOutputKeyClass(Text.class);
    lu.setOutputValueClass(Text.class);
    lu.setMapperClass(ReadUsers.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lu.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lu, new Path(inputDir + "/users_sorted"));
    FileOutputFormat.setOutputPath(lu, new Path(outputDir + "/indexed_users_14"));
    lu.setNumReduceTasks(0);
    Job loadUsers = new Job(lu);

    JobConf join = new JobConf(L14.class);
    join.setJobName("L14 Join Users and Pages");
    join.setInputFormat(KeyValueTextInputFormat.class);
    join.setOutputKeyClass(Text.class);
    join.setOutputValueClass(Text.class);
    join.setMapperClass(IdentityMapper.class);
    join.setReducerClass(Join.class);
    props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        join.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_pages_14"));
    FileInputFormat.addInputPath(join, new Path(outputDir + "/indexed_users_14"));
    FileOutputFormat.setOutputPath(join, new Path(outputDir + "/L14out"));
    join.setNumReduceTasks(Integer.parseInt(parallel));
    Job joinJob = new Job(join);
    joinJob.addDependingJob(loadPages);
    joinJob.addDependingJob(loadUsers);

    JobControl jc = new JobControl("L14 join");
    jc.addJob(loadPages);
    jc.addJob(loadUsers);
    jc.addJob(joinJob);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
        ArrayList<Job> failures = jc.getFailedJobs();
        if (failures != null && failures.size() > 0) {
            for (Job failure : failures) {
                System.err.println(failure.getMessage());
            }
            break;
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
        }

        if (i % 10000 == 0) {
            System.out.println("Running jobs");
            ArrayList<Job> running = jc.getRunningJobs();
            if (running != null && running.size() > 0) {
                for (Job r : running) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Ready jobs");
            ArrayList<Job> ready = jc.getReadyJobs();
            if (ready != null && ready.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Waiting jobs");
            ArrayList<Job> waiting = jc.getWaitingJobs();
            if (waiting != null && waiting.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Successful jobs");
            ArrayList<Job> success = jc.getSuccessfulJobs();
            if (success != null && success.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
        }
        i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
            System.err.println(failure.getMessage());
        }
    }
    jc.stop();
}

From source file:org.apache.pig.test.pigmix.mapreduce.L15.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Parameters: inputDir outputDir parallel");
        System.exit(1);/*from  w  w w.j  a  v a  2 s  .c  o m*/
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    JobConf lp = new JobConf(L15.class);
    lp.setJobName("L15 Load Page Views");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(Text.class);
    lp.setOutputValueClass(Text.class);
    lp.setMapperClass(ReadPageViews.class);
    lp.setCombinerClass(Combiner.class);
    lp.setReducerClass(Group.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
        lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L15out"));
    lp.setNumReduceTasks(Integer.parseInt(parallel));
    Job group = new Job(lp);

    JobControl jc = new JobControl("L15 join");
    jc.addJob(group);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
        ArrayList<Job> failures = jc.getFailedJobs();
        if (failures != null && failures.size() > 0) {
            for (Job failure : failures) {
                System.err.println(failure.getMessage());
            }
            break;
        }

        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
        }

        if (i % 10000 == 0) {
            System.out.println("Running jobs");
            ArrayList<Job> running = jc.getRunningJobs();
            if (running != null && running.size() > 0) {
                for (Job r : running) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Ready jobs");
            ArrayList<Job> ready = jc.getReadyJobs();
            if (ready != null && ready.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Waiting jobs");
            ArrayList<Job> waiting = jc.getWaitingJobs();
            if (waiting != null && waiting.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
            System.out.println("Successful jobs");
            ArrayList<Job> success = jc.getSuccessfulJobs();
            if (success != null && success.size() > 0) {
                for (Job r : ready) {
                    System.out.println(r.getJobName());
                }
            }
        }
        i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
            System.err.println(failure.getMessage());
        }
    }
    jc.stop();
}