Example usage for org.apache.hadoop.mapred RunningJob isSuccessful

List of usage examples for org.apache.hadoop.mapred RunningJob isSuccessful

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RunningJob isSuccessful.

Prototype

public boolean isSuccessful() throws IOException;

Source Link

Document

Check if the job completed successfully.

Usage

From source file:org.archive.jbs.Merge.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("jbs.Merge <output> <input>...");
        return 1;
    }//from   w  ww .jav a2  s .  c om

    JobConf conf = new JobConf(getConf(), Merge.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    // Choose the outputformat to either merge or index the records
    //
    // org.archive.jbs.lucene.LuceneOutputFormat
    //    - builds local Lucene index
    //
    // org.archive.jbs.solr.SolrOutputFormat
    //    - sends documents to remote Solr server
    //
    // org.apache.hadoop.mapred.MapFileOutputFormat
    //    - writes merged documents to Hadoop MapFile
    conf.setOutputFormat((Class) Class
            .forName(conf.get("jbs.outputformat.class", "org.apache.hadoop.mapred.MapFileOutputFormat")));

    // Set the Hadoop job name to incorporate the output format name.
    String formatName = conf.getOutputFormat().getClass().getName();
    conf.setJobName("jbs.Merge "
            + formatName.substring(formatName.lastIndexOf('.') != -1 ? (formatName.lastIndexOf('.') + 1) : 0));

    // Add the input paths as either NutchWAX segment directories or
    // text .dup files.
    for (int i = 1; i < args.length; i++) {
        Path p = new Path(args[i]);

        // Expand any file globs and then check each matching path
        FileStatus[] files = FileSystem.get(conf).globStatus(p);

        for (FileStatus file : files) {
            if (file.isDir()) {
                // If it's a directory, then check if it is a Nutch segment, otherwise treat as a SequenceFile.
                if (p.getFileSystem(conf).exists(new Path(file.getPath(), "parse_data"))) {
                    LOG.info("Input NutchWax: " + file.getPath());
                    MultipleInputs.addInputPath(conf, new Path(file.getPath(), "parse_data"),
                            SequenceFileInputFormat.class, NutchMapper.class);
                    MultipleInputs.addInputPath(conf, new Path(file.getPath(), "parse_text"),
                            SequenceFileInputFormat.class, NutchMapper.class);
                } else {
                    // Assume it's a SequenceFile of JSON-encoded Documents.
                    LOG.info("Input Document: " + file.getPath());
                    MultipleInputs.addInputPath(conf, file.getPath(), SequenceFileInputFormat.class,
                            DocumentMapper.class);
                }
            } else {
                // Not a directory, assume it's a text file, either CDX or property specifications.
                LOG.info("Input TextFile: " + file.getPath());
                MultipleInputs.addInputPath(conf, file.getPath(), TextInputFormat.class, TextMapper.class);
            }
        }
    }

    FileOutputFormat.setOutputPath(conf, new Path(args[0]));

    RunningJob rj = JobClient.runJob(conf);

    return rj.isSuccessful() ? 0 : 1;
}

From source file:org.archive.jbs.misc.PageRank.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("PageRank <output> <input>...");
        return 1;
    }/*from  w  w  w . ja  va  2s.  c  om*/

    JobConf conf = new JobConf(getConf(), PageRank.class);
    conf.setJobName("jbs.PageRank");

    // No need to set this since we use the MultipleInputs class
    // below, which allows us to specify a mapper for each input.
    // conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(GenericObject.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setOutputFormat(SequenceFileOutputFormat.class);

    // The input paths should be either NutchWAX segment directories
    // or Hadoop SequenceFiles containing JSON-encoded Documents
    for (int i = 1; i < args.length; i++) {
        Path p = new Path(args[i]);

        // Expand any file globs and then check each matching path
        FileStatus[] files = FileSystem.get(conf).globStatus(p);

        for (FileStatus file : files) {
            if (file.isDir()) {
                // If it's a directory, then check if it is a Nutch segment, otherwise treat as a SequenceFile.
                Path nwp = new Path(file.getPath(), "parse_data");
                if (p.getFileSystem(conf).exists(nwp)) {
                    LOG.info("Adding input path: " + nwp);
                    MultipleInputs.addInputPath(conf, nwp, SequenceFileInputFormat.class, Map.class);
                } else {
                    LOG.info("Adding input path: " + file.getPath());
                    MultipleInputs.addInputPath(conf, file.getPath(), SequenceFileInputFormat.class, Map.class);
                }
            } else {
                // Not a directory, skip it.
                LOG.warn("Not a directory, skip input: " + file.getPath());
            }
        }
    }

    FileOutputFormat.setOutputPath(conf, new Path(args[0]));

    RunningJob rj = JobClient.runJob(conf);

    return rj.isSuccessful() ? 0 : 1;
}

From source file:org.archive.jbs.Parse.java

License:Apache License

/**
 * Run the job.//from   w  w  w  . ja v  a  2s.  c o  m
 */
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        usage();
        return 1;
    }

    FileSystem fs = FileSystem.get(getConf());

    // Create a job configuration
    JobConf job = new JobConf(getConf());

    // Job name uses output dir to help identify it to the operator.
    job.setJobName("jbs.Parse " + args[0]);

    // The inputs are a list of filenames, use the
    // FilenameInputFormat to pass them to the mappers.
    job.setInputFormat(FilenameInputFormat.class);

    // This is a map-only job, no reducers.
    job.setNumReduceTasks(0);

    // Use the Parse-specific output format.
    job.setOutputFormat(PerMapOutputFormat.class);

    // Use our ParseMapper, with output keys and values of type
    // Text.
    job.setMapperClass(ParseMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Configure the input and output paths, from the command-line.
    Path outputDir = new Path(args[0]);
    FileOutputFormat.setOutputPath(job, outputDir);

    boolean atLeastOneInput = false;
    for (int i = 1; i < args.length; i++) {
        FileSystem inputfs = FileSystem.get(new java.net.URI(args[i]), getConf());

        for (FileStatus status : inputfs.globStatus(new Path(args[i]))) {
            Path inputPath = status.getPath();
            Path outputPath = new Path(outputDir, inputPath.getName());
            if (fs.exists(outputPath)) {
                LOG.debug("Output path already exists: " + outputPath);
            } else {
                atLeastOneInput = true;
                LOG.info("Add input path: " + inputPath);
                FileInputFormat.addInputPath(job, inputPath);
            }
        }
    }

    if (!atLeastOneInput) {
        LOG.info("No input files to parse.");
        return 0;
    }

    // Run the job!
    RunningJob rj = JobClient.runJob(job);

    if (!rj.isSuccessful()) {
        LOG.error("FAILED: " + rj.getID());
        return 2;
    }

    return 0;
}

From source file:org.archive.nutchwax.Importer.java

License:Apache License

/**
 * Runs the import job with the given arguments.  This method
 * assumes that is is being run via the command-line; as such, it
 * emits error messages regarding invalid/missing arguments to the
 * system error stream.//  w ww  .  ja v a  2s  . com
 */
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        usage();
        return -1;
    }

    JobConf job = new NutchJob(getConf());

    // Check for "-e <exclusions>" option.
    int pos = 0;
    if (args[0].equals("-e")) {
        if (args.length < 2) {
            System.out.println("ERROR: Missing filename for option \"-e\"\n");
            usage();
            return -1;
        }

        job.set("nutchwax.urlfilter.wayback.exclusions", args[1]);

        pos = 2;
    }

    if (args.length - pos < 1) {
        System.out.println("ERROR: Missing manifest file.\n");
        usage();
        return -1;
    }

    Path manifestPath = new Path(args[pos++]);

    Path segmentPath;
    if (args.length - pos < 1) {
        segmentPath = new Path("segments", org.apache.nutch.crawl.Generator.generateSegmentName());
    } else {
        segmentPath = new Path(args[pos]);
    }

    try {
        job.setJobName("Importer " + manifestPath);
        job.set(Nutch.SEGMENT_NAME_KEY, segmentPath.getName());

        //job.setInputPath  ( manifestPath);
        FileInputFormat.addInputPath(job, manifestPath);
        job.setInputFormat(TextInputFormat.class);

        job.setMapperClass(Importer.class);

        //job.setOutputPath      ( segmentPath               );
        FileOutputFormat.setOutputPath(job, segmentPath);
        job.setOutputFormat(FetcherOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NutchWritable.class);

        RunningJob rj = JobClient.runJob(job);

        return rj.isSuccessful() ? 0 : 1;
    } catch (Exception e) {
        LOG.fatal("Importer: ", e);
        System.out.println("Fatal error: " + e);
        e.printStackTrace(System.out);
        return -1;
    }
}

From source file:org.archive.nutchwax.ImporterToHdfs.java

License:Apache License

/**
 * Runs the import job with the given arguments. This method assumes that is
 * is being run via the command-line; as such, it emits error messages
 * regarding invalid/missing arguments to the system error stream.
 *///w ww  .  j  a va  2  s.  co  m
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        usage();
        return -1;
    }

    JobConf job = new NutchJob(getConf());
    System.setProperty("fullPathExecution", "false");
    Path manifestPath = null;

    // Check for "-e <exclusions>" option & "-p <path_to_warc_files>" option.
    int pos = 0;
    for (String[] str : getOptsList(args)) {
        if (args.length < pos + 2) {
            System.out.println("ERROR: Missing filename for option \"" + str[0] + "\"\n");
            usage();
            return -1;
        }

        if (str[0].equals("-p")) {
            manifestPath = new Path(getManifestFile(str[1]));
            System.setProperty("fullPathExecution", "true");
        } else if (str[0].equals("-e")) {
            job.set("nutchwax.urlfilter.wayback.exclusions", str[1]);
        }
        pos = pos + 2;
    }

    if (manifestPath == null) {
        if (args.length - pos < 1) {
            System.out.println("ERROR: Missing manifest file.\n");
            usage();
            return -1;
        } else {
            manifestPath = new Path(args[pos++]);
        }
    }

    Path segmentPath;
    if (args.length - pos < 1) {
        segmentPath = new Path("segments", org.apache.nutch.crawl.Generator.generateSegmentName());
    } else {
        segmentPath = new Path(args[pos]);
    }

    try {
        job.setJobName("Importer_to_Hdfs " + manifestPath);
        job.set(Nutch.SEGMENT_NAME_KEY, segmentPath.getName());

        // job.setInputPath ( manifestPath);
        FileInputFormat.addInputPath(job, manifestPath);
        job.setInputFormat(TextInputFormat.class);

        job.setMapperClass(ImporterToHdfs.class);

        // job.setOutputPath ( segmentPath );
        FileOutputFormat.setOutputPath(job, segmentPath);
        job.setOutputFormat(FetcherOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NutchWritable.class);

        RunningJob rj = JobClient.runJob(job);

        return rj.isSuccessful() ? 0 : 1;
    } catch (Exception e) {
        LOG.fatal("Importer_to_Hdfs: ", e);
        System.out.println("Fatal error: " + e);
        e.printStackTrace(System.out);
        return -1;
    }
}

From source file:org.cloudata.examples.upload.partitionjob.PartitionJob.java

License:Apache License

public boolean runJob(String inputPath, String tableName, int numOfTablets) throws IOException {
    JobConf jobConf = new JobConf(PartitionJob.class);
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    FileSystem fs = FileSystem.get(jobConf);
    // ? // www.  j  a va  2s .c  o  m
    FileUtil.delete(fs, new Path(getLogCountFilepath(tableName)), true);

    jobConf.setJobName("PartitionJob_" + tableName + "(" + new Date() + ")");
    jobConf.set("cloudata.numOfTablets", String.valueOf(numOfTablets));
    jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName);

    String clientOpt = jobConf.get("mapred.child.java.opts");
    if (clientOpt == null) {
        clientOpt = "";
    }
    jobConf.set("mapred.child.java.opts", clientOpt + " -Duser.name=" + System.getProperty("user.name"));

    //<Map>
    FileInputFormat.addInputPath(jobConf, new Path(inputPath));
    jobConf.setInputFormat(TextInputFormat.class);
    jobConf.setMapperClass(PartitionMap.class);
    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    //</Map>

    //<Reduce>
    Path tempOutputPath = new Path("temp/partitionJob/" + tableName + "/reducer");
    FileOutputFormat.setOutputPath(jobConf, tempOutputPath);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);
    jobConf.setReducerClass(PartitionReducer.class);
    //Reduce  1 
    jobConf.setNumReduceTasks(1);
    //</Reduce>

    try {
        RunningJob job = JobClient.runJob(jobConf);
        return job.isSuccessful();
    } finally {
        FileUtil.delete(fs, new Path(getLogCountFilepath(tableName)), true);
        FileUtil.delete(fs, tempOutputPath, true);
        CloudataMapReduceUtil.clearMapReduce(libDir);
    }
}

From source file:org.hxx.hadoop.GeneratorHbase.java

License:Apache License

public Path generateAll(int tableNum, Path segments, long topN, int reduceCnt, boolean filter, boolean norm,
        boolean force) {
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("Generator: from table=" + tableNum + " starting at " + sdf.format(start));
    LOG.info("Generator: filtering:=" + filter + "; Generator: normalizing=" + norm);

    Path segment = new Path(segments, Generator.generateSegmentName());
    String table = "crawldb" + tableNum;
    long cnt = 0;
    try {//from   w w w.j  a v  a2 s.  c o  m
        RunningJob r = generateJob(table, segment, topN, reduceCnt, filter, norm, force);
        Counter counter = r.getCounters().findCounter("Generator", "records");
        cnt = counter.getValue();
        if (r.isSuccessful()) {
            // LOG.info(Nutch.GEN_JOB_SUCCESS + "=1;");
        } else {
            // LOG.info(Nutch.GEN_JOB_FAIL + "=1;");
        }
        LOG.info("Generator: " + segment + " records: " + cnt + " current table=" + table + " timeused="
                + (System.currentTimeMillis() - start) / 1000 + "s");
    } catch (Throwable e) {
        removePath(segment);
        LOG.error("generateAll:", e);
    }

    int less = getConf().getInt("generator.less", 10000);
    if (cnt == 0) {
        removePath(segment);
        return null;
    } else if (cnt <= less) {// too less : && cnt <= 10000
        removePath(segment);
        return null;
    }

    long end = System.currentTimeMillis();
    // LOG.info(Nutch.GEN_TIME + "=" + (end - start) + ";");
    // have records
    GenerateInfos.topn = topN;
    GenerateInfos.hostn = getConf().getInt(Generator.GENERATOR_MAX_COUNT, -1);
    GenerateInfo genInfo = GenerateInfos.getGenerateInfo();
    genInfo.start = start;
    genInfo.generate = cnt;
    genInfo.table = table;
    genInfo.end = end;
    genInfo.endTime = sdf.format(genInfo.end);
    LOG.info(GenerateInfos.printString());
    LOG.info("Generator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));

    return segment;
}

From source file:org.pentaho.di.job.entries.hadoopjobexecutor.JobEntryHadoopJobExecutor.java

License:Apache License

public Result execute(Result result, int arg1) throws KettleException {
    result.setNrErrors(0);// ww w.  ja v a2 s. c o  m

    Log4jFileAppender appender = null;
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$

    String hadoopDistro = System.getProperty("hadoop.distribution.name", hadoopDistribution);
    hadoopDistro = environmentSubstitute(hadoopDistro);
    if (Const.isEmpty(hadoopDistro)) {
        hadoopDistro = "generic";
    }

    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.FailedToOpenLogFile", logFileName, //$NON-NLS-1$
                e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        URL resolvedJarUrl = null;
        String jarUrlS = environmentSubstitute(jarUrl);
        if (jarUrlS.indexOf("://") == -1) {
            // default to file://
            File jarFile = new File(jarUrlS);
            resolvedJarUrl = jarFile.toURI().toURL();
        } else {
            resolvedJarUrl = new URL(jarUrlS);
        }

        final String cmdLineArgsS = environmentSubstitute(cmdLineArgs);

        if (log.isDetailed())
            logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.ResolvedJar",
                    resolvedJarUrl.toExternalForm()));

        if (isSimple) {
            /*      final AtomicInteger taskCount = new AtomicInteger(0);
                  final AtomicInteger successCount = new AtomicInteger(0);
                  final AtomicInteger failedCount = new AtomicInteger(0); */

            if (log.isDetailed())
                logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.SimpleMode"));
            List<Class<?>> classesWithMains = JarUtility
                    .getClassesInJarWithMain(resolvedJarUrl.toExternalForm(), getClass().getClassLoader());
            for (final Class<?> clazz : classesWithMains) {
                Runnable r = new Runnable() {
                    public void run() {
                        try {
                            final ClassLoader cl = Thread.currentThread().getContextClassLoader();
                            try {
                                //                  taskCount.incrementAndGet();
                                Thread.currentThread().setContextClassLoader(clazz.getClassLoader());
                                Method mainMethod = clazz.getMethod("main", new Class[] { String[].class });
                                Object[] args = (cmdLineArgsS != null)
                                        ? new Object[] { cmdLineArgsS.split(" ") }
                                        : new Object[0];
                                mainMethod.invoke(null, args);
                            } finally {
                                Thread.currentThread().setContextClassLoader(cl);
                                //                  successCount.incrementAndGet();
                                //                  taskCount.decrementAndGet();
                            }
                        } catch (Throwable ignored) {
                            // skip, try the next one
                            //                logError(ignored.getMessage());
                            //                failedCount.incrementAndGet();
                            ignored.printStackTrace();
                        }
                    }
                };
                Thread t = new Thread(r);
                t.start();
            }

            // uncomment to implement blocking
            /* if (blocking) {
              while (taskCount.get() > 0 && !parentJob.isStopped()) {
                Thread.sleep(1000);
              }
                    
              if (!parentJob.isStopped()) {
                result.setResult(successCount.get() > 0);
                result.setNrErrors((successCount.get() > 0) ? 0 : 1);
              } else {
                // we can't really know at this stage if 
                // the hadoop job will finish successfully 
                // because we have to stop now
                result.setResult(true); // look on the bright side of life :-)...
                result.setNrErrors(0);
              }
            } else { */
            // non-blocking - just set success equal to no failures arising
            // from invocation
            //          result.setResult(failedCount.get() == 0);
            //          result.setNrErrors(failedCount.get());
            result.setResult(true);
            result.setNrErrors(0);
            /* } */
        } else {
            if (log.isDetailed())
                logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.AdvancedMode"));

            URL[] urls = new URL[] { resolvedJarUrl };
            URLClassLoader loader = new URLClassLoader(urls, getClass().getClassLoader());

            JobConf conf = new JobConf();
            String hadoopJobNameS = environmentSubstitute(hadoopJobName);
            conf.setJobName(hadoopJobNameS);

            String outputKeyClassS = environmentSubstitute(outputKeyClass);
            conf.setOutputKeyClass(loader.loadClass(outputKeyClassS));
            String outputValueClassS = environmentSubstitute(outputValueClass);
            conf.setOutputValueClass(loader.loadClass(outputValueClassS));

            if (mapperClass != null) {
                String mapperClassS = environmentSubstitute(mapperClass);
                Class<? extends Mapper> mapper = (Class<? extends Mapper>) loader.loadClass(mapperClassS);
                conf.setMapperClass(mapper);
            }
            if (combinerClass != null) {
                String combinerClassS = environmentSubstitute(combinerClass);
                Class<? extends Reducer> combiner = (Class<? extends Reducer>) loader.loadClass(combinerClassS);
                conf.setCombinerClass(combiner);
            }
            if (reducerClass != null) {
                String reducerClassS = environmentSubstitute(reducerClass);
                Class<? extends Reducer> reducer = (Class<? extends Reducer>) loader.loadClass(reducerClassS);
                conf.setReducerClass(reducer);
            }

            if (inputFormatClass != null) {
                String inputFormatClassS = environmentSubstitute(inputFormatClass);
                Class<? extends InputFormat> inputFormat = (Class<? extends InputFormat>) loader
                        .loadClass(inputFormatClassS);
                conf.setInputFormat(inputFormat);
            }
            if (outputFormatClass != null) {
                String outputFormatClassS = environmentSubstitute(outputFormatClass);
                Class<? extends OutputFormat> outputFormat = (Class<? extends OutputFormat>) loader
                        .loadClass(outputFormatClassS);
                conf.setOutputFormat(outputFormat);
            }

            String hdfsHostnameS = environmentSubstitute(hdfsHostname);
            String hdfsPortS = environmentSubstitute(hdfsPort);
            String jobTrackerHostnameS = environmentSubstitute(jobTrackerHostname);
            String jobTrackerPortS = environmentSubstitute(jobTrackerPort);

            // See if we can auto detect the distribution first
            HadoopConfigurer configurer = HadoopConfigurerFactory.locateConfigurer();

            if (configurer == null) {
                // go with what has been selected by the user
                configurer = HadoopConfigurerFactory.getConfigurer(hadoopDistro);

                // if the user-specified distribution is detectable, make sure it is still
                // the current distribution!
                if (configurer != null && configurer.isDetectable()) {
                    if (!configurer.isAvailable()) {
                        throw new KettleException(BaseMessages.getString(PKG,
                                "JobEntryHadoopJobExecutor.Error.DistroNoLongerPresent",
                                configurer.distributionName()));
                    }
                }
            }
            if (configurer == null) {
                throw new KettleException(BaseMessages.getString(PKG,
                        "JobEntryHadoopJobExecutor.Error.UnknownHadoopDistribution", hadoopDistro));
            }
            logBasic(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Message.DistroConfigMessage",
                    configurer.distributionName()));

            List<String> configMessages = new ArrayList<String>();
            configurer.configure(hdfsHostnameS, hdfsPortS, jobTrackerHostnameS, jobTrackerPortS, conf,
                    configMessages);
            for (String m : configMessages) {
                logBasic(m);
            }

            String inputPathS = environmentSubstitute(inputPath);
            String[] inputPathParts = inputPathS.split(",");
            List<Path> paths = new ArrayList<Path>();
            for (String path : inputPathParts) {
                paths.add(new Path(configurer.getFilesystemURL() + path));
            }
            Path[] finalPaths = paths.toArray(new Path[paths.size()]);

            //FileInputFormat.setInputPaths(conf, new Path(configurer.getFilesystemURL() + inputPathS));
            FileInputFormat.setInputPaths(conf, finalPaths);
            String outputPathS = environmentSubstitute(outputPath);
            FileOutputFormat.setOutputPath(conf, new Path(configurer.getFilesystemURL() + outputPathS));

            // process user defined values
            for (UserDefinedItem item : userDefined) {
                if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null
                        && !"".equals(item.getValue())) {
                    String nameS = environmentSubstitute(item.getName());
                    String valueS = environmentSubstitute(item.getValue());
                    conf.set(nameS, valueS);
                }
            }

            String workingDirectoryS = environmentSubstitute(workingDirectory);
            conf.setWorkingDirectory(new Path(configurer.getFilesystemURL() + workingDirectoryS));
            conf.setJar(jarUrl);

            String numMapTasksS = environmentSubstitute(numMapTasks);
            String numReduceTasksS = environmentSubstitute(numReduceTasks);
            int numM = 1;
            try {
                numM = Integer.parseInt(numMapTasksS);
            } catch (NumberFormatException e) {
                logError("Can't parse number of map tasks '" + numMapTasksS + "'. Setting num"
                        + "map tasks to 1");
            }
            int numR = 1;
            try {
                numR = Integer.parseInt(numReduceTasksS);
            } catch (NumberFormatException e) {
                logError("Can't parse number of reduce tasks '" + numReduceTasksS + "'. Setting num"
                        + "reduce tasks to 1");
            }

            conf.setNumMapTasks(numM);
            conf.setNumReduceTasks(numR);

            JobClient jobClient = new JobClient(conf);
            RunningJob runningJob = jobClient.submitJob(conf);

            String loggingIntervalS = environmentSubstitute(loggingInterval);
            int logIntv = 60;
            try {
                logIntv = Integer.parseInt(loggingIntervalS);
            } catch (NumberFormatException e) {
                logError("Can't parse logging interval '" + loggingIntervalS + "'. Setting "
                        + "logging interval to 60");
            }
            if (blocking) {
                try {
                    int taskCompletionEventIndex = 0;
                    while (!parentJob.isStopped() && !runningJob.isComplete()) {
                        if (logIntv >= 1) {
                            printJobStatus(runningJob);
                            taskCompletionEventIndex = logTaskMessages(runningJob, taskCompletionEventIndex);
                            Thread.sleep(logIntv * 1000);
                        } else {
                            Thread.sleep(60000);
                        }
                    }

                    if (parentJob.isStopped() && !runningJob.isComplete()) {
                        // We must stop the job running on Hadoop
                        runningJob.killJob();
                        // Indicate this job entry did not complete
                        result.setResult(false);
                    }

                    printJobStatus(runningJob);
                    // Log any messages we may have missed while polling
                    logTaskMessages(runningJob, taskCompletionEventIndex);
                } catch (InterruptedException ie) {
                    logError(ie.getMessage(), ie);
                }

                // Entry is successful if the MR job is successful overall
                result.setResult(runningJob.isSuccessful());
            }

        }
    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}

From source file:org.pooledtimeseries.healthcheck.CheckCartesianProductSeqFile.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    long start = System.currentTimeMillis();
    JobConf conf = new JobConf("Cartesian Product");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: CheckCartesianProductSeqFile <input sequence file> <out>");
        System.exit(1);/*from w  ww.j a  v a  2 s.  c o  m*/
    }

    // Configure the join type
    conf.setJarByClass(CheckCartesianProductSeqFile.class);

    conf.setMapperClass(CartesianMapper.class);
    conf.setReducerClass(CartesianReducer.class);

    conf.setInputFormat(CartesianInputFormat.class);
    CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]);
    CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]);

    TextOutputFormat.setOutputPath(conf, new Path(otherArgs[1]));

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    RunningJob job = JobClient.runJob(conf);
    while (!job.isComplete()) {
        Thread.sleep(1000);
    }

    long finish = System.currentTimeMillis();

    System.out.println("Time in ms: " + (finish - start));

    System.exit(job.isSuccessful() ? 0 : 2);
}