Example usage for org.apache.hadoop.mapred RunningJob isSuccessful

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RunningJob isSuccessful.

Prototype

public boolean isSuccessful() throws IOException;

Source Link

Document

Check if the job completed successfully.

Usage

From source file:org.archive.jbs.Merge.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("jbs.Merge <output> <input>...");
        return 1;
    }//from   w  ww .jav a2  s .  c om

    JobConf conf = new JobConf(getConf(), Merge.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    // Choose the outputformat to either merge or index the records
    //
    // org.archive.jbs.lucene.LuceneOutputFormat
    //    - builds local Lucene index
    //
    // org.archive.jbs.solr.SolrOutputFormat
    //    - sends documents to remote Solr server
    //
    // org.apache.hadoop.mapred.MapFileOutputFormat
    //    - writes merged documents to Hadoop MapFile
    conf.setOutputFormat((Class) Class
            .forName(conf.get("jbs.outputformat.class", "org.apache.hadoop.mapred.MapFileOutputFormat")));

    // Set the Hadoop job name to incorporate the output format name.
    String formatName = conf.getOutputFormat().getClass().getName();
    conf.setJobName("jbs.Merge "
            + formatName.substring(formatName.lastIndexOf('.') != -1 ? (formatName.lastIndexOf('.') + 1) : 0));

    // Add the input paths as either NutchWAX segment directories or
    // text .dup files.
    for (int i = 1; i < args.length; i++) {
        Path p = new Path(args[i]);

        // Expand any file globs and then check each matching path
        FileStatus[] files = FileSystem.get(conf).globStatus(p);

        for (FileStatus file : files) {
            if (file.isDir()) {
                // If it's a directory, then check if it is a Nutch segment, otherwise treat as a SequenceFile.
                if (p.getFileSystem(conf).exists(new Path(file.getPath(), "parse_data"))) {
                    LOG.info("Input NutchWax: " + file.getPath());
                    MultipleInputs.addInputPath(conf, new Path(file.getPath(), "parse_data"),
                            SequenceFileInputFormat.class, NutchMapper.class);
                    MultipleInputs.addInputPath(conf, new Path(file.getPath(), "parse_text"),
                            SequenceFileInputFormat.class, NutchMapper.class);
                } else {
                    // Assume it's a SequenceFile of JSON-encoded Documents.
                    LOG.info("Input Document: " + file.getPath());
                    MultipleInputs.addInputPath(conf, file.getPath(), SequenceFileInputFormat.class,
                            DocumentMapper.class);
                }
            } else {
                // Not a directory, assume it's a text file, either CDX or property specifications.
                LOG.info("Input TextFile: " + file.getPath());
                MultipleInputs.addInputPath(conf, file.getPath(), TextInputFormat.class, TextMapper.class);
            }
        }
    }

    FileOutputFormat.setOutputPath(conf, new Path(args[0]));

    RunningJob rj = JobClient.runJob(conf);

    return rj.isSuccessful() ? 0 : 1;
}

From source file:org.archive.jbs.misc.PageRank.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("PageRank <output> <input>...");
        return 1;
    }/*from  w  w  w . ja  va  2s.  c  om*/

    JobConf conf = new JobConf(getConf(), PageRank.class);
    conf.setJobName("jbs.PageRank");

    // No need to set this since we use the MultipleInputs class
    // below, which allows us to specify a mapper for each input.
    // conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(GenericObject.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setOutputFormat(SequenceFileOutputFormat.class);

    // The input paths should be either NutchWAX segment directories
    // or Hadoop SequenceFiles containing JSON-encoded Documents
    for (int i = 1; i < args.length; i++) {
        Path p = new Path(args[i]);

        // Expand any file globs and then check each matching path
        FileStatus[] files = FileSystem.get(conf).globStatus(p);

        for (FileStatus file : files) {
            if (file.isDir()) {
                // If it's a directory, then check if it is a Nutch segment, otherwise treat as a SequenceFile.
                Path nwp = new Path(file.getPath(), "parse_data");
                if (p.getFileSystem(conf).exists(nwp)) {
                    LOG.info("Adding input path: " + nwp);
                    MultipleInputs.addInputPath(conf, nwp, SequenceFileInputFormat.class, Map.class);
                } else {
                    LOG.info("Adding input path: " + file.getPath());
                    MultipleInputs.addInputPath(conf, file.getPath(), SequenceFileInputFormat.class, Map.class);
                }
            } else {
                // Not a directory, skip it.
                LOG.warn("Not a directory, skip input: " + file.getPath());
            }
        }
    }

    FileOutputFormat.setOutputPath(conf, new Path(args[0]));

    RunningJob rj = JobClient.runJob(conf);

    return rj.isSuccessful() ? 0 : 1;
}

From source file:org.archive.jbs.Parse.java

License:Apache License

/**
 * Run the job.//from   w  w  w  . ja v  a  2s.  c o  m
 */
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        usage();
        return 1;
    }

    FileSystem fs = FileSystem.get(getConf());

    // Create a job configuration
    JobConf job = new JobConf(getConf());

    // Job name uses output dir to help identify it to the operator.
    job.setJobName("jbs.Parse " + args[0]);

    // The inputs are a list of filenames, use the
    // FilenameInputFormat to pass them to the mappers.
    job.setInputFormat(FilenameInputFormat.class);

    // This is a map-only job, no reducers.
    job.setNumReduceTasks(0);

    // Use the Parse-specific output format.
    job.setOutputFormat(PerMapOutputFormat.class);

    // Use our ParseMapper, with output keys and values of type
    // Text.
    job.setMapperClass(ParseMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Configure the input and output paths, from the command-line.
    Path outputDir = new Path(args[0]);
    FileOutputFormat.setOutputPath(job, outputDir);

    boolean atLeastOneInput = false;
    for (int i = 1; i < args.length; i++) {
        FileSystem inputfs = FileSystem.get(new java.net.URI(args[i]), getConf());

        for (FileStatus status : inputfs.globStatus(new Path(args[i]))) {
            Path inputPath = status.getPath();
            Path outputPath = new Path(outputDir, inputPath.getName());
            if (fs.exists(outputPath)) {
                LOG.debug("Output path already exists: " + outputPath);
            } else {
                atLeastOneInput = true;
                LOG.info("Add input path: " + inputPath);
                FileInputFormat.addInputPath(job, inputPath);
            }
        }
    }

    if (!atLeastOneInput) {
        LOG.info("No input files to parse.");
        return 0;
    }

    // Run the job!
    RunningJob rj = JobClient.runJob(job);

    if (!rj.isSuccessful()) {
        LOG.error("FAILED: " + rj.getID());
        return 2;
    }

    return 0;
}

From source file:org.archive.nutchwax.Importer.java

License:Apache License

/**
 * Runs the import job with the given arguments.  This method
 * assumes that is is being run via the command-line; as such, it
 * emits error messages regarding invalid/missing arguments to the
 * system error stream.//  w ww  .  ja v a  2s  . com
 */
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        usage();
        return -1;
    }

    JobConf job = new NutchJob(getConf());

    // Check for "-e <exclusions>" option.
    int pos = 0;
    if (args[0].equals("-e")) {
        if (args.length < 2) {
            System.out.println("ERROR: Missing filename for option \"-e\"\n");
            usage();
            return -1;
        }

        job.set("nutchwax.urlfilter.wayback.exclusions", args[1]);

        pos = 2;
    }

    if (args.length - pos < 1) {
        System.out.println("ERROR: Missing manifest file.\n");
        usage();
        return -1;
    }

    Path manifestPath = new Path(args[pos++]);

    Path segmentPath;
    if (args.length - pos < 1) {
        segmentPath = new Path("segments", org.apache.nutch.crawl.Generator.generateSegmentName());
    } else {
        segmentPath = new Path(args[pos]);
    }

    try {
        job.setJobName("Importer " + manifestPath);
        job.set(Nutch.SEGMENT_NAME_KEY, segmentPath.getName());

        //job.setInputPath  ( manifestPath);
        FileInputFormat.addInputPath(job, manifestPath);
        job.setInputFormat(TextInputFormat.class);

        job.setMapperClass(Importer.class);

        //job.setOutputPath      ( segmentPath               );
        FileOutputFormat.setOutputPath(job, segmentPath);
        job.setOutputFormat(FetcherOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NutchWritable.class);

        RunningJob rj = JobClient.runJob(job);

        return rj.isSuccessful() ? 0 : 1;
    } catch (Exception e) {
        LOG.fatal("Importer: ", e);
        System.out.println("Fatal error: " + e);
        e.printStackTrace(System.out);
        return -1;
    }
}

From source file:org.archive.nutchwax.ImporterToHdfs.java

License:Apache License

/**
 * Runs the import job with the given arguments. This method assumes that is
 * is being run via the command-line; as such, it emits error messages
 * regarding invalid/missing arguments to the system error stream.
 *///w ww  .  j  a va  2  s.  co  m
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        usage();
        return -1;
    }

    JobConf job = new NutchJob(getConf());
    System.setProperty("fullPathExecution", "false");
    Path manifestPath = null;

    // Check for "-e <exclusions>" option & "-p <path_to_warc_files>" option.
    int pos = 0;
    for (String[] str : getOptsList(args)) {
        if (args.length < pos + 2) {
            System.out.println("ERROR: Missing filename for option \"" + str[0] + "\"\n");
            usage();
            return -1;
        }

        if (str[0].equals("-p")) {
            manifestPath = new Path(getManifestFile(str[1]));
            System.setProperty("fullPathExecution", "true");
        } else if (str[0].equals("-e")) {
            job.set("nutchwax.urlfilter.wayback.exclusions", str[1]);
        }
        pos = pos + 2;
    }

    if (manifestPath == null) {
        if (args.length - pos < 1) {
            System.out.println("ERROR: Missing manifest file.\n");
            usage();
            return -1;
        } else {
            manifestPath = new Path(args[pos++]);
        }
    }

    Path segmentPath;
    if (args.length - pos < 1) {
        segmentPath = new Path("segments", org.apache.nutch.crawl.Generator.generateSegmentName());
    } else {
        segmentPath = new Path(args[pos]);
    }

    try {
        job.setJobName("Importer_to_Hdfs " + manifestPath);
        job.set(Nutch.SEGMENT_NAME_KEY, segmentPath.getName());

        // job.setInputPath ( manifestPath);
        FileInputFormat.addInputPath(job, manifestPath);
        job.setInputFormat(TextInputFormat.class);

        job.setMapperClass(ImporterToHdfs.class);

        // job.setOutputPath ( segmentPath );
        FileOutputFormat.setOutputPath(job, segmentPath);
        job.setOutputFormat(FetcherOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NutchWritable.class);

        RunningJob rj = JobClient.runJob(job);

        return rj.isSuccessful() ? 0 : 1;
    } catch (Exception e) {
        LOG.fatal("Importer_to_Hdfs: ", e);
        System.out.println("Fatal error: " + e);
        e.printStackTrace(System.out);
        return -1;
    }
}

From source file:org.cloudata.examples.upload.partitionjob.PartitionJob.java

License:Apache License

public boolean runJob(String inputPath, String tableName, int numOfTablets) throws IOException {
    JobConf jobConf = new JobConf(PartitionJob.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    FileSystem fs = FileSystem.get(jobConf);
    // ? // www.  j  a va  2s .c  o  m
    FileUtil.delete(fs, new Path(getLogCountFilepath(tableName)), true);

    jobConf.setJobName("PartitionJob_" + tableName + "(" + new Date() + ")");
    jobConf.set("cloudata.numOfTablets", String.valueOf(numOfTablets));
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);

    String clientOpt = jobConf.get("mapred.child.java.opts");
    if (clientOpt == null) {
        clientOpt = "";
    }
    jobConf.set("mapred.child.java.opts", clientOpt + " -Duser.name=" + System.getProperty("user.name"));

    //<Map>
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setMapperClass(PartitionMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    //</Map>

    //<Reduce>
    Path tempOutputPath = new Path("temp/partitionJob/" + tableName + "/reducer");
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setReducerClass(PartitionReducer.class);
    //Reduce  1 
    jobConf.setNumReduceTasks(1);
    //</Reduce>

    try {
        RunningJob job = JobClient.runJob(jobConf);
        return job.isSuccessful();
    } finally {
        FileUtil.delete(fs, new Path(getLogCountFilepath(tableName)), true);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.hxx.hadoop.GeneratorHbase.java

License:Apache License

public Path generateAll(int tableNum, Path segments, long topN, int reduceCnt, boolean filter, boolean norm,
        boolean force) {
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("Generator: from table=" + tableNum + " starting at " + sdf.format(start));
    LOG.info("Generator: filtering:=" + filter + "; Generator: normalizing=" + norm);

    Path segment = new Path(segments, Generator.generateSegmentName());
    String table = "crawldb" + tableNum;
    long cnt = 0;
    try {//from   w w w.j  a v  a2 s.  c o  m
        RunningJob r = generateJob(table, segment, topN, reduceCnt, filter, norm, force);
        Counter counter = r.getCounters().findCounter("Generator", "records");
        cnt = counter.getValue();
        if (r.isSuccessful()) {
            // LOG.info(Nutch.GEN_JOB_SUCCESS + "=1;");
        } else {
            // LOG.info(Nutch.GEN_JOB_FAIL + "=1;");
        }
        LOG.info("Generator: " + segment + " records: " + cnt + " current table=" + table + " timeused="
                + (System.currentTimeMillis() - start) / 1000 + "s");
    } catch (Throwable e) {
        removePath(segment);
        LOG.error("generateAll:", e);
    }

    int less = getConf().getInt("generator.less", 10000);
    if (cnt == 0) {
        removePath(segment);
        return null;
    } else if (cnt <= less) {// too less : && cnt <= 10000
        removePath(segment);
        return null;
    }

    long end = System.currentTimeMillis();
    // LOG.info(Nutch.GEN_TIME + "=" + (end - start) + ";");
    // have records
    GenerateInfos.topn = topN;
    GenerateInfos.hostn = getConf().getInt(Generator.GENERATOR_MAX_COUNT, -1);
    GenerateInfo genInfo = GenerateInfos.getGenerateInfo();
    genInfo.start = start;
    genInfo.generate = cnt;
    genInfo.table = table;
    genInfo.end = end;
    genInfo.endTime = sdf.format(genInfo.end);
    LOG.info(GenerateInfos.printString());
    LOG.info("Generator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));

    return segment;
}

From source file:org.pentaho.di.job.entries.hadoopjobexecutor.JobEntryHadoopJobExecutor.java

License:Apache License

public Result execute(Result result, int arg1) throws KettleException {
    result.setNrErrors(0);// ww w.  ja v a2 s. c o  m

    Log4jFileAppender appender = null;
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$

    String hadoopDistro = System.getProperty("hadoop.distribution.name", hadoopDistribution);
    hadoopDistro = environmentSubstitute(hadoopDistro);
    if (Const.isEmpty(hadoopDistro)) {
        hadoopDistro = "generic";
    }

    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.FailedToOpenLogFile", logFileName, //$NON-NLS-1$
                e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        URL resolvedJarUrl = null;
        String jarUrlS = environmentSubstitute(jarUrl);
        if (jarUrlS.indexOf("://") == -1) {
            // default to file://
            File jarFile = new File(jarUrlS);
            resolvedJarUrl = jarFile.toURI().toURL();
        } else {
            resolvedJarUrl = new URL(jarUrlS);
        }

        final String cmdLineArgsS = environmentSubstitute(cmdLineArgs);

        if (log.isDetailed())
            logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.ResolvedJar",
                    resolvedJarUrl.toExternalForm()));

        if (isSimple) {
            /*      final AtomicInteger taskCount = new AtomicInteger(0);
                  final AtomicInteger successCount = new AtomicInteger(0);
                  final AtomicInteger failedCount = new AtomicInteger(0); */

            if (log.isDetailed())
                logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.SimpleMode"));
            List<Class<?>> classesWithMains = JarUtility
                    .getClassesInJarWithMain(resolvedJarUrl.toExternalForm(), getClass().getClassLoader());
            for (final Class<?> clazz : classesWithMains) {
                Runnable r = new Runnable() {
                    public void run() {
                        try {
                            final ClassLoader cl = Thread.currentThread().getContextClassLoader();
                            try {
                                //                  taskCount.incrementAndGet();
                                Thread.currentThread().setContextClassLoader(clazz.getClassLoader());
                                Method mainMethod = clazz.getMethod("main", new Class[] { String[].class });
                                Object[] args = (cmdLineArgsS != null)
                                        ? new Object[] { cmdLineArgsS.split(" ") }
                                        : new Object[0];
                                mainMethod.invoke(null, args);
                            } finally {
                                Thread.currentThread().setContextClassLoader(cl);
                                //                  successCount.incrementAndGet();
                                //                  taskCount.decrementAndGet();
                            }
                        } catch (Throwable ignored) {
                            // skip, try the next one
                            //                logError(ignored.getMessage());
                            //                failedCount.incrementAndGet();
                            ignored.printStackTrace();
                        }
                    }
                };
                Thread t = new Thread(r);
                t.start();
            }

            // uncomment to implement blocking
            /* if (blocking) {
              while (taskCount.get() > 0 && !parentJob.isStopped()) {
                Thread.sleep(1000);
              }
                    
              if (!parentJob.isStopped()) {
                result.setResult(successCount.get() > 0);
                result.setNrErrors((successCount.get() > 0) ? 0 : 1);
              } else {
                // we can't really know at this stage if 
                // the hadoop job will finish successfully 
                // because we have to stop now
                result.setResult(true); // look on the bright side of life :-)...
                result.setNrErrors(0);
              }
            } else { */
            // non-blocking - just set success equal to no failures arising
            // from invocation
            //          result.setResult(failedCount.get() == 0);
            //          result.setNrErrors(failedCount.get());
            result.setResult(true);
            result.setNrErrors(0);
            /* } */
        } else {
            if (log.isDetailed())
                logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.AdvancedMode"));

            URL[] urls = new URL[] { resolvedJarUrl };
            URLClassLoader loader = new URLClassLoader(urls, getClass().getClassLoader());

            JobConf conf = new JobConf();
            String hadoopJobNameS = environmentSubstitute(hadoopJobName);
            conf.setJobName(hadoopJobNameS);

            String outputKeyClassS = environmentSubstitute(outputKeyClass);
            conf.setOutputKeyClass(loader.loadClass(outputKeyClassS));
            String outputValueClassS = environmentSubstitute(outputValueClass);
            conf.setOutputValueClass(loader.loadClass(outputValueClassS));

            if (mapperClass != null) {
                String mapperClassS = environmentSubstitute(mapperClass);
                Class<? extends Mapper> mapper = (Class<? extends Mapper>) loader.loadClass(mapperClassS);
                conf.setMapperClass(mapper);
            }
            if (combinerClass != null) {
                String combinerClassS = environmentSubstitute(combinerClass);
                Class<? extends Reducer> combiner = (Class<? extends Reducer>) loader.loadClass(combinerClassS);
                conf.setCombinerClass(combiner);
            }
            if (reducerClass != null) {
                String reducerClassS = environmentSubstitute(reducerClass);
                Class<? extends Reducer> reducer = (Class<? extends Reducer>) loader.loadClass(reducerClassS);
                conf.setReducerClass(reducer);
            }

            if (inputFormatClass != null) {
                String inputFormatClassS = environmentSubstitute(inputFormatClass);
                Class<? extends InputFormat> inputFormat = (Class<? extends InputFormat>) loader
                        .loadClass(inputFormatClassS);
                conf.setInputFormat(inputFormat);
            }
            if (outputFormatClass != null) {
                String outputFormatClassS = environmentSubstitute(outputFormatClass);
                Class<? extends OutputFormat> outputFormat = (Class<? extends OutputFormat>) loader
                        .loadClass(outputFormatClassS);
                conf.setOutputFormat(outputFormat);
            }

            String hdfsHostnameS = environmentSubstitute(hdfsHostname);
            String hdfsPortS = environmentSubstitute(hdfsPort);
            String jobTrackerHostnameS = environmentSubstitute(jobTrackerHostname);
            String jobTrackerPortS = environmentSubstitute(jobTrackerPort);

            // See if we can auto detect the distribution first
            HadoopConfigurer configurer = HadoopConfigurerFactory.locateConfigurer();

            if (configurer == null) {
                // go with what has been selected by the user
                configurer = HadoopConfigurerFactory.getConfigurer(hadoopDistro);

                // if the user-specified distribution is detectable, make sure it is still
                // the current distribution!
                if (configurer != null && configurer.isDetectable()) {
                    if (!configurer.isAvailable()) {
                        throw new KettleException(BaseMessages.getString(PKG,
                                "JobEntryHadoopJobExecutor.Error.DistroNoLongerPresent",
                                configurer.distributionName()));
                    }
                }
            }
            if (configurer == null) {
                throw new KettleException(BaseMessages.getString(PKG,
                        "JobEntryHadoopJobExecutor.Error.UnknownHadoopDistribution", hadoopDistro));
            }
            logBasic(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Message.DistroConfigMessage",
                    configurer.distributionName()));

            List<String> configMessages = new ArrayList<String>();
            configurer.configure(hdfsHostnameS, hdfsPortS, jobTrackerHostnameS, jobTrackerPortS, conf,
                    configMessages);
            for (String m : configMessages) {
                logBasic(m);
            }

            String inputPathS = environmentSubstitute(inputPath);
            String[] inputPathParts = inputPathS.split(",");
            List<Path> paths = new ArrayList<Path>();
            for (String path : inputPathParts) {
                paths.add(new Path(configurer.getFilesystemURL() + path));
            }
            Path[] finalPaths = paths.toArray(new Path[paths.size()]);

            //FileInputFormat.setInputPaths(conf, new Path(configurer.getFilesystemURL() + inputPathS));
            FileInputFormat.setInputPaths(conf, finalPaths);
            String outputPathS = environmentSubstitute(outputPath);
            FileOutputFormat.setOutputPath(conf, new Path(configurer.getFilesystemURL() + outputPathS));

            // process user defined values
            for (UserDefinedItem item : userDefined) {
                if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null
                        && !"".equals(item.getValue())) {
                    String nameS = environmentSubstitute(item.getName());
                    String valueS = environmentSubstitute(item.getValue());
                    conf.set(nameS, valueS);
                }
            }

            String workingDirectoryS = environmentSubstitute(workingDirectory);
            conf.setWorkingDirectory(new Path(configurer.getFilesystemURL() + workingDirectoryS));
            conf.setJar(jarUrl);

            String numMapTasksS = environmentSubstitute(numMapTasks);
            String numReduceTasksS = environmentSubstitute(numReduceTasks);
            int numM = 1;
            try {
                numM = Integer.parseInt(numMapTasksS);
            } catch (NumberFormatException e) {
                logError("Can't parse number of map tasks '" + numMapTasksS + "'. Setting num"
                        + "map tasks to 1");
            }
            int numR = 1;
            try {
                numR = Integer.parseInt(numReduceTasksS);
            } catch (NumberFormatException e) {
                logError("Can't parse number of reduce tasks '" + numReduceTasksS + "'. Setting num"
                        + "reduce tasks to 1");
            }

            conf.setNumMapTasks(numM);
            conf.setNumReduceTasks(numR);

            JobClient jobClient = new JobClient(conf);
            RunningJob runningJob = jobClient.submitJob(conf);

            String loggingIntervalS = environmentSubstitute(loggingInterval);
            int logIntv = 60;
            try {
                logIntv = Integer.parseInt(loggingIntervalS);
            } catch (NumberFormatException e) {
                logError("Can't parse logging interval '" + loggingIntervalS + "'. Setting "
                        + "logging interval to 60");
            }
            if (blocking) {
                try {
                    int taskCompletionEventIndex = 0;
                    while (!parentJob.isStopped() && !runningJob.isComplete()) {
                        if (logIntv >= 1) {
                            printJobStatus(runningJob);
                            taskCompletionEventIndex = logTaskMessages(runningJob, taskCompletionEventIndex);
                            Thread.sleep(logIntv * 1000);
                        } else {
                            Thread.sleep(60000);
                        }
                    }

                    if (parentJob.isStopped() && !runningJob.isComplete()) {
                        // We must stop the job running on Hadoop
                        runningJob.killJob();
                        // Indicate this job entry did not complete
                        result.setResult(false);
                    }

                    printJobStatus(runningJob);
                    // Log any messages we may have missed while polling
                    logTaskMessages(runningJob, taskCompletionEventIndex);
                } catch (InterruptedException ie) {
                    logError(ie.getMessage(), ie);
                }

                // Entry is successful if the MR job is successful overall
                result.setResult(runningJob.isSuccessful());
            }

        }
    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}

From source file:org.pooledtimeseries.healthcheck.CheckCartesianProductSeqFile.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    long start = System.currentTimeMillis();
    JobConf conf = new JobConf("Cartesian Product");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: CheckCartesianProductSeqFile <input sequence file> <out>");
        System.exit(1);/*from w  ww.j a  v a  2 s.  c o  m*/
    }

    // Configure the join type
    conf.setJarByClass(CheckCartesianProductSeqFile.class);

    conf.setMapperClass(CartesianMapper.class);
    conf.setReducerClass(CartesianReducer.class);

    conf.setInputFormat(CartesianInputFormat.class);
    CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]);
    CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]);

    TextOutputFormat.setOutputPath(conf, new Path(otherArgs[1]));

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    RunningJob job = JobClient.runJob(conf);
    while (!job.isComplete()) {
        Thread.sleep(1000);
    }

    long finish = System.currentTimeMillis();

    System.out.println("Time in ms: " + (finish - start));

    System.exit(job.isSuccessful() ? 0 : 2);
}