Example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs addNamedOutput

List of usage examples for org.apache.hadoop.mapreduce.lib.output MultipleOutputs addNamedOutput

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs addNamedOutput.

Prototype

@SuppressWarnings("unchecked")
public static void addNamedOutput(Job job, String namedOutput, Class<? extends OutputFormat> outputFormatClass,
        Class<?> keyClass, Class<?> valueClass) 

Source Link

Document

Adds a named output for the job.

Usage

From source file:com.talis.mapreduce.dicenc.SecondDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from  w w  w .ja  v  a  2 s  .  co  m*/

    Job job = new Job(getConf(), "second");
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(SecondMapper.class);
    job.setReducerClass(SecondReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    MultipleOutputs.addNamedOutput(job, "dict", TextOutputFormat.class, Text.class, Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java

License:Apache License

/**
 * This method returns a Job instance out of a {@link MSCR} entity. It puts the Class of 
 * the {@link PlumeWorkflow} argument and the MSCR id in the hadoop configuration.
 * //from  w  w  w.  ja  v a  2s.  co m
 * @param mscr The MSCR to convert 
 * @param workflow The workflow whose class will be instantiated by hadoop mappers/reducers
 * @param outputPath The output path of the MapRed job
 * @return A hadoop-executable MapRed Job
 * 
 * @throws IOException
 */
static Job getMapRed(final MSCR mscr, PlumeWorkflow workFlow, String workFlowOutputPath, String outputPath)
        throws IOException {

    Configuration conf = new Configuration();
    conf.set(WORKFLOW_NAME, workFlow.getClass().getName());
    conf.setInt(MSCR_ID, mscr.getId());
    conf.set(TEMP_OUTPUT_PATH, workFlowOutputPath);

    Job job = new Job(conf, "MSCR"); // TODO deprecation

    job.setMapOutputKeyClass(PlumeObject.class);
    job.setMapOutputValueClass(PlumeObject.class);

    job.setJarByClass(MapRedExecutor.class);

    /**
     * Define multiple inputs
     */
    for (PCollection<?> input : mscr.getInputs()) {
        if (!(input instanceof LazyCollection)) {
            throw new IllegalArgumentException("Can't create MapRed from MSCR whose inputs are not LazyTable");
        }
        LazyCollection<Text> l = (LazyCollection<Text>) input;
        if (!(l.isMaterialized() && l.getFile() != null)) {
            // Collections have plume ID only if they are intermediate results - TODO better naming for this
            if (l.getPlumeId().length() < 1) {
                throw new IllegalArgumentException(
                        "Can't create MapRed from MSCR inputs that are not materialized to a file");
            }
        }
        PCollectionType<?> rType = l.getType();
        Class<? extends InputFormat> format = SequenceFileInputFormat.class;
        if (rType instanceof PTableType) {
            PTableType<?, ?> tType = (PTableType<?, ?>) rType;
            if (tType.valueType() instanceof StringType && tType.keyType() instanceof StringType) {
                format = KeyValueTextInputFormat.class;
            }
            MultipleInputs.addInputPath(job, new Path(l.getFile()), format, MSCRMapper.class);
        } else {
            if (rType.elementType() instanceof StringType) {
                format = TextInputFormat.class;
            }
            MultipleInputs.addInputPath(job, new Path(l.getFile()), format, MSCRMapper.class);
        }
    }
    /**
     * Define multiple outputs
     */
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) {
        PCollectionType<?> rType = ((LazyCollection<?>) mscr.getOutputChannels().get(entry.getKey()).output)
                .getType();
        if (rType instanceof PTableType) {
            PTableType<?, ?> tType = (PTableType<?, ?>) rType;
            Class<? extends OutputFormat> outputFormat = SequenceFileOutputFormat.class;
            if (tType.keyType() instanceof StringType && tType.valueType() instanceof StringType) {
                outputFormat = TextOutputFormat.class;
            }
            MultipleOutputs.addNamedOutput(job, entry.getValue() + "", outputFormat,
                    getHadoopType(tType.keyType()), getHadoopType(tType.valueType()));
        } else {
            Class<? extends OutputFormat> outputFormat = SequenceFileOutputFormat.class;
            if (rType.elementType() instanceof StringType) {
                outputFormat = TextOutputFormat.class;
            }
            MultipleOutputs.addNamedOutput(job, entry.getValue() + "", outputFormat, NullWritable.class,
                    getHadoopType(rType.elementType()));
        }
    }
    /**
     * Define Reducer & Combiner
     */
    job.setCombinerClass(MSCRCombiner.class);
    job.setReducerClass(MSCRReducer.class);

    job.setNumReduceTasks(1);
    return job;
}

From source file:com.wipro.ats.bdre.dq.DQDriver.java

License:Apache License

@Override
public int run(String[] arg) throws Exception {
    String processId = arg[0];/*from   www  .  j  ava 2s . c  o  m*/
    String sPath = arg[1];
    String destDir = arg[2];

    Properties props = new GetProperties().getProperties(processId, "dq");
    LOGGER.debug("props=" + props);
    Configuration conf = getConf();

    conf.set("dq.process.id", processId);
    Job job = Job.getInstance(conf);
    job.setJobName("Data Quality " + processId);
    job.setJarByClass(DQDriver.class);
    job.setMapperClass(DQMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    //Reducer is not required
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    Path inputFilePath = new Path(sPath);
    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, removeIfExistAndSetOutputPath(conf, destDir));
    MultipleOutputs.addNamedOutput(job, DQConstants.GOOD_RECORDS_FILE, TextOutputFormat.class, Text.class,
            NullWritable.class);
    MultipleOutputs.addNamedOutput(job, DQConstants.BAD_RECORDS_FILE, TextOutputFormat.class, Text.class,
            NullWritable.class);
    MultipleOutputs.addNamedOutput(job, DQConstants.FILE_REPORT_FILE, TextOutputFormat.class, Text.class,
            NullWritable.class);

    if (!job.waitForCompletion(true)) {
        return 1;
    }

    Path outputDir = new Path(destDir);
    FileSystem srcFs = outputDir.getFileSystem(getConf());
    FileSystem destFs = outputDir.getFileSystem(getConf());

    //Valid Records
    Path goodFilesSrcDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_GOOD_RECORD_OUTPUT_DIR);
    //Input and quality filtered file should have same name (but different path)
    Path goodDestFile = new Path(destDir + "/" + inputFilePath.getName());
    if (srcFs.exists(goodFilesSrcDir)) {
        FileUtil.copyMerge(srcFs, goodFilesSrcDir, destFs, goodDestFile, true, conf, "");
    }
    // Invalid Records
    Path badFilesSrcDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_BAD_RECORD_OUTPUT_DIR);
    Path badDestFile = new Path(destDir + "/" + DQConstants.BAD_RECORDS_FILE);
    if (srcFs.exists(badFilesSrcDir)) {
        FileUtil.copyMerge(srcFs, badFilesSrcDir, destFs, badDestFile, true, conf, "");
    }

    // Preparing report aggregation job
    Job fileReportAggregationJob = Job.getInstance(conf);
    fileReportAggregationJob.setJobName("File Report Computing " + processId);
    fileReportAggregationJob.setJarByClass(DQMain.class);

    fileReportAggregationJob.setMapperClass(DQFileReportMapper.class);
    fileReportAggregationJob.setMapOutputKeyClass(Text.class);
    fileReportAggregationJob.setMapOutputValueClass(IntWritable.class);

    fileReportAggregationJob.setReducerClass(DQFileReportReducer.class);
    fileReportAggregationJob.setOutputKeyClass(Text.class);
    fileReportAggregationJob.setOutputValueClass(Text.class);

    fileReportAggregationJob.setNumReduceTasks(1);

    Path fileReportDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_REPORT_OUTPUT_DIR);
    Path fileReportOutputDir = new Path(destDir + "/" + DQConstants.AGGREGATED_REPORT_PLACEHOLDER_FOLDER);

    FileInputFormat.addInputPath(fileReportAggregationJob, fileReportDir);
    FileOutputFormat.setOutputPath(fileReportAggregationJob, fileReportOutputDir);

    if (!fileReportAggregationJob.waitForCompletion(true)) {
        return 1;
    }

    // Merge Report Records MR stuffs
    Path reportsSrcDir = new Path(destDir + "/" + DQConstants.AGGREGATED_REPORT_PLACEHOLDER_FOLDER);
    Path reportsDestFile = new Path(destDir + "/" + DQConstants.FILE_REPORT_FILE);
    FileUtil.copyMerge(srcFs, reportsSrcDir, destFs, reportsDestFile, true, conf, "");

    Path reportDestFile = new Path(outputDir.toString() + "/" + DQConstants.FILE_REPORT_FILE);
    //Read the report file from HDFS and report the percentage
    DQStats dqStats = getQualityStats(getConf(), reportDestFile);
    LOGGER.info("Percentage of good records :" + dqStats.getGoodPercent());
    props = new GetProperties().getProperties(processId, "dq");
    String strThreshold = props.getProperty("min.pass.threshold.percent");
    float threshold = Float.parseFloat(strThreshold);
    dqStats.setThreshold(threshold);
    //Update the result in metadata
    logResult(dqStats, processId, 0L);
    if (dqStats.getGoodPercent() < threshold) {
        LOGGER.error("DQ check did not pass");
        throw new DQValidationException(dqStats);
    }
    LOGGER.info(dqStats);
    FileChecksum hdfsChecksum = destFs.getFileChecksum(goodDestFile);
    String fileHash = hdfsChecksum == null ? "0" : hdfsChecksum.toString();
    //Return file info oozie params
    RegisterFileInfo registerFileInfo = new RegisterFileInfo();
    registerFileInfo.setBatchId(null);
    registerFileInfo.setCreationTs(new Timestamp(new Date().getTime()));
    registerFileInfo.setFileHash(fileHash);
    registerFileInfo.setFileSize(destFs.getFileStatus(goodDestFile).getLen());
    registerFileInfo.setPath(goodDestFile.toString());
    registerFileInfo.setSubProcessId(Integer.parseInt(processId));
    OozieUtil oozieUtil = new OozieUtil();
    oozieUtil.persistBeanData(registerFileInfo, false);

    return 0;
}

From source file:com.yahoo.labs.yamall.hadoop.Test.java

License:Open Source License

/**
 * Run the map/reduce job/*from  w w  w  .j a va  2  s  . co m*/
 */
public final int run(final String[] args) throws Exception {

    startLogger(Level.INFO);

    Configuration conf = getConf();
    conf.set("yamall.vw_model", args[2]);
    conf.setIfUnset("yamall.bit_precision", "18");
    conf.setIfUnset("yamall.parser", "vw");

    // Print to screen all the options
    TreeMap<String, String> map = new TreeMap<String, String>();
    for (Map.Entry<String, String> entry : conf) {
        map.put(entry.getKey(), entry.getValue());
    }
    for (Map.Entry<String, String> entry : map.entrySet()) {
        System.out.printf("%s=%s\n", entry.getKey(), entry.getValue());
    }

    Job job = Job.getInstance(conf, "Yamall Test on MapReduce");
    job.setNumReduceTasks(1);
    job.setJarByClass(Test.class);
    job.setMapperClass(TestMapper.class);
    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setReducerClass(TestReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(CompositeDoubleTextWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    MultipleOutputs.addNamedOutput(job, "out", TextOutputFormat.class, NullWritable.class, Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java

License:Apache License

/**
 * pass2: perform the LLR calculation/* w ww .j av  a  2s  .  co m*/
 */
private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal,
        boolean emitUnigrams, float minValue, int reduceTasks)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration(baseConf);
    conf.setLong(AssocReducer.NGRAM_TOTAL, nGramTotal);
    conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
    conf.setFloat(AssocReducer.MIN_VALUE, minValue);
    conf.setInt("mapred.job.map.memory.mb", 1280);
    conf.setInt("mapred.job.reduce.memory.mb", 2560);
    conf.set("mapred.reduce.child.java.opts", "-Xmx2G");
    conf.setInt("mapred.task.timeout", 6000000);
    conf.set(AssocReducer.ASSOC_METRIC, "llr");

    Job job = new Job(conf);
    job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output + " pruning: " + minValue);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(Gram.class);
    job.setMapOutputValueClass(Gram.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY));
    Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY + "_llr");
    FileOutputFormat.setOutputPath(job, outPath);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class);
    job.setReducerClass(AssocReducer.class);
    job.setNumReduceTasks(reduceTasks);
    // Defines additional single text based output 'text' for the job
    MultipleOutputs.addNamedOutput(job, "contingency", TextOutputFormat.class, Text.class, Text.class);

    // Defines additional multi sequencefile based output 'sequence' for the
    // job
    MultipleOutputs.addNamedOutput(job, "llr", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "pmi", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "chi", TextOutputFormat.class, Text.class, DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "dice", TextOutputFormat.class, Text.class, DoubleWritable.class);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:edu.umn.cs.sthadoop.hdfs.KNNJoin.java

License:Open Source License

static void knnJoinMapReduce(OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    final Path[] inputPaths = params.getInputPaths();
    Path outputPath = params.getOutputPath();
    //final int k = params.getInt("k", 1);
    KNNJRecordReader.params = params;/*from   w  ww .j  a  v a2 s .  c  o  m*/
    //System.out.println(params.getInputPaths().length);

    long t1 = System.currentTimeMillis();
    // phase 1
    params.set("type", "phase1");
    Job job = Job.getInstance(params, "KNNJoin Phase1");
    job.setJarByClass(KNNJoin.class);
    job.setInputFormatClass(KNNJInputFormat.class);
    KNNJInputFormat.setInputPaths(job, inputPaths[0], inputPaths[1]);
    job.setMapperClass(KNNJMap.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);
    MultipleOutputs.addNamedOutput(job, "phase2", TextOutputFormat3.class, Text.class, Text.class);

    // Submit the job
    if (job.waitForCompletion(true)) {
        LOG.info("[stat:job[0]");
    } else {
        LOG.info("[stat:job[1]");
        return;
    }
    long t2 = System.currentTimeMillis() - t1;
    t1 = System.currentTimeMillis();
    Counters counters = job.getCounters();
    long refSplits = counters.findCounter(KNNJMap.Stats.refSplits).getValue();
    long qSplits = counters.findCounter(KNNJMap.Stats.qSplits).getValue();
    long numRefRecs = counters.findCounter(KNNJMap.Stats.numRefRecs).getValue();
    long numQRecs = counters.findCounter(KNNJMap.Stats.numQRecs).getValue();
    long numP2Recs = counters.findCounter(KNNJMap.Stats.phase2Recs).getValue();
    String str = String.format(
            "stat:counters[refSplits=%s;qSplits=%s;numRefRecs=%s;" + "numQRecs=%s;numP2Recs=%s;t1=%s]",
            refSplits, qSplits, numRefRecs, numQRecs, numP2Recs, t2);
    LOG.info(str);
    // LOG.info("[stat:counter:refSplits="+refSplits+"]");
    // LOG.info("[stat:counter:qSplits="+qSplits+"]");
    // LOG.info("[stat:counter:numRefRecs="+numRefRecs+"]");
    // LOG.info("[stat:counter:numQRecs="+numQRecs+"]");
    // LOG.info("[stat:counter:numP2Recs="+numP2Recs+"]");
    /*
     * for (Iterator<String> iterator = counters.getGroupNames().iterator();
     * iterator.hasNext();) {
     * String str = (String) iterator.next();
     * LOG.info("[stat:counter="+str+"]");
     * }
     */
    // end of phase 1

    // phase 2
    /*params.set("type", "phase2");
    Job job2 = Job.getInstance(params, "KNNJoin Phase2");
    job2.setJarByClass(KNNJoin.class);
    job2.setMapperClass(TokenizerMapper.class);
    job2.setReducerClass(GroupingReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);
            
    FileSystem outputFS = outputPath.getFileSystem(params);
    Path p2OutPath;
    do {
       p2OutPath = new Path(outputPath.getParent(), outputPath.getName() + ".knnj_" + (int) (Math.random() * 1000000));
    } while (outputFS.exists(p2OutPath));
    FileSystem p2OutPathFS = FileSystem.get(p2OutPath.toUri(), params);
            
    job2.setInputFormatClass(KNNJInputFormatPhase2.class);
    KNNJInputFormatPhase2.setInputPaths(job2, outputPath);
    job2.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job2, p2OutPath);
    MultipleOutputs.addNamedOutput(job2, "phase3", TextOutputFormat3.class, NullWritable.class, Text.class);
            
    // Submit the job
            
     * if (job2.waitForCompletion(true)) {
     * LOG.info("Job2 succeeded.");
     * } else {
     * LOG.info("Job2 failed.");
     * return;
     * }
             
    // end of phase 2
            
    t2 = System.currentTimeMillis() - t1;
    LOG.info("[stat:time:2=" + t2 + "]");
    t1 = System.currentTimeMillis();
            
    // phase 3
    params.set("type", "phase3");
    Job job3 = Job.getInstance(params, "KNNJoin Phase3");
    job3.setJarByClass(KNNJoin.class);
            
    job3.setMapperClass(KNNJMapPhase3.class);
    job3.setOutputKeyClass(NullWritable.class);
    job3.setOutputValueClass(Text.class);
    job3.setNumReduceTasks(0);
            
    Path p3OutPath;
    do {
       p3OutPath = new Path(outputPath.getParent(), outputPath.getName() + ".knnj_" + (int) (Math.random() * 1000000));
    } while (outputFS.exists(p3OutPath));
    FileSystem p3OutPathFS = FileSystem.get(p3OutPath.toUri(), params);
            
    job3.setInputFormatClass(KNNJInputFormatPhase3.class);
    KNNJInputFormatPhase3.setInputPaths(job3, p2OutPath, inputPaths[1]);
    job3.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job3, p3OutPath);
            
    // Submit the job
            
     * if (job3.waitForCompletion(true)) {
     * LOG.info("Job3 succeeded.");
     * } else {
     * LOG.info("Job3 failed.");
     * return;
     * }
             
    // end of phase 3
            
    // cleaning temporary dirs and files
    p2OutPathFS.delete(p2OutPath, true);
    p3OutPathFS.delete(p3OutPath, true);
            
    t2 = System.currentTimeMillis() - t1;
    LOG.info("[stat:time:3=" + t2 + "]");*/
}

From source file:edu.umn.cs.sthadoop.operations.STJoins.java

License:Open Source License

static void JoinMapReduce(OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    final Path[] inputPaths = params.getInputPaths();
    Path outputPath = params.getOutputPath();
    //final int k = params.getInt("k", 1);
    HdfsRecordReader.params = params;/*w  ww.java2 s  .c om*/
    //System.out.println(params.getInputPaths().length);

    long t1 = System.currentTimeMillis();
    // phase 1
    params.set("type", "phase1");
    Job job = Job.getInstance(params, "ST-Join Phase1");
    job.setJarByClass(STJoinsMapper.class);
    job.setInputFormatClass(HdfsInputFormat.class);
    HdfsInputFormat.setInputPaths(job, inputPaths[0], inputPaths[1]);
    job.setMapperClass(STJoinsMapper.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);
    MultipleOutputs.addNamedOutput(job, "phase2", TextOutputFormat3.class, Text.class, Text.class);

    // Submit the job
    if (job.waitForCompletion(true)) {
        LOG.info("[stat:job[0]");
    } else {
        LOG.info("[stat:job[1]");
        return;
    }
    long t2 = System.currentTimeMillis() - t1;
    t1 = System.currentTimeMillis();
    Counters counters = job.getCounters();
    long refSplits = counters.findCounter(STJoinsMapper.Stats.refSplits).getValue();
    long qSplits = counters.findCounter(STJoinsMapper.Stats.qSplits).getValue();
    long numRefRecs = counters.findCounter(STJoinsMapper.Stats.numRefRecs).getValue();
    long numQRecs = counters.findCounter(STJoinsMapper.Stats.numQRecs).getValue();
    long numP2Recs = counters.findCounter(STJoinsMapper.Stats.phase2Recs).getValue();
    String str = String.format(
            "stat:counters[refSplits=%s;qSplits=%s;numRefRecs=%s;" + "numQRecs=%s;numP2Recs=%s;t1=%s]",
            refSplits, qSplits, numRefRecs, numQRecs, numP2Recs, t2);
    LOG.info(str);
    // LOG.info("[stat:counter:refSplits="+refSplits+"]");
    // LOG.info("[stat:counter:qSplits="+qSplits+"]");
    // LOG.info("[stat:counter:numRefRecs="+numRefRecs+"]");
    // LOG.info("[stat:counter:numQRecs="+numQRecs+"]");
    // LOG.info("[stat:counter:numP2Recs="+numP2Recs+"]");
    /*
     * for (Iterator<String> iterator = counters.getGroupNames().iterator();
     * iterator.hasNext();) {
     * String str = (String) iterator.next();
     * LOG.info("[stat:counter="+str+"]");
     * }
     */
    // end of phase 1

    // phase 2
    /*params.set("type", "phase2");
    Job job2 = Job.getInstance(params, "KNNJoin Phase2");
    job2.setJarByClass(KNNJoin.class);
    job2.setMapperClass(TokenizerMapper.class);
    job2.setReducerClass(GroupingReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);
            
    FileSystem outputFS = outputPath.getFileSystem(params);
    Path p2OutPath;
    do {
       p2OutPath = new Path(outputPath.getParent(), outputPath.getName() + ".knnj_" + (int) (Math.random() * 1000000));
    } while (outputFS.exists(p2OutPath));
    FileSystem p2OutPathFS = FileSystem.get(p2OutPath.toUri(), params);
            
    job2.setInputFormatClass(KNNJInputFormatPhase2.class);
    KNNJInputFormatPhase2.setInputPaths(job2, outputPath);
    job2.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job2, p2OutPath);
    MultipleOutputs.addNamedOutput(job2, "phase3", TextOutputFormat3.class, NullWritable.class, Text.class);
            
    // Submit the job
            
     * if (job2.waitForCompletion(true)) {
     * LOG.info("Job2 succeeded.");
     * } else {
     * LOG.info("Job2 failed.");
     * return;
     * }
             
    // end of phase 2
            
    t2 = System.currentTimeMillis() - t1;
    LOG.info("[stat:time:2=" + t2 + "]");
    t1 = System.currentTimeMillis();
            
    // phase 3
    params.set("type", "phase3");
    Job job3 = Job.getInstance(params, "KNNJoin Phase3");
    job3.setJarByClass(KNNJoin.class);
            
    job3.setMapperClass( STJoinsMapperPhase3.class);
    job3.setOutputKeyClass(NullWritable.class);
    job3.setOutputValueClass(Text.class);
    job3.setNumReduceTasks(0);
            
    Path p3OutPath;
    do {
       p3OutPath = new Path(outputPath.getParent(), outputPath.getName() + ".knnj_" + (int) (Math.random() * 1000000));
    } while (outputFS.exists(p3OutPath));
    FileSystem p3OutPathFS = FileSystem.get(p3OutPath.toUri(), params);
            
    job3.setInputFormatClass(KNNJInputFormatPhase3.class);
    KNNJInputFormatPhase3.setInputPaths(job3, p2OutPath, inputPaths[1]);
    job3.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job3, p3OutPath);
            
    // Submit the job
            
     * if (job3.waitForCompletion(true)) {
     * LOG.info("Job3 succeeded.");
     * } else {
     * LOG.info("Job3 failed.");
     * return;
     * }
             
    // end of phase 3
            
    // cleaning temporary dirs and files
    p2OutPathFS.delete(p2OutPath, true);
    p3OutPathFS.delete(p3OutPath, true);
            
    t2 = System.currentTimeMillis() - t1;
    LOG.info("[stat:time:3=" + t2 + "]");*/
}

From source file:eu.edisonproject.classification.tfidf.mapreduce.CompetencesDistanceDriver.java

License:Apache License

@Override
public int run(String[] args) {
    try {//from ww w  . j  a v a2s . c om
        Configuration conf = HBaseConfiguration.create();
        //additional output using TextOutputFormat.
        conf.set("file.names", args[3]);

        Job job = Job.getInstance(conf);
        //TableMapReduceUtil.addDependencyJars(job); 
        job.setJarByClass(CompetencesDistanceDriver.class);
        //This row must be changed
        job.setJobName("Words Group By Title Driver");

        Path inPath = new Path(args[0]);
        Path outPath = new Path(args[1]);

        Path competencesPath = new Path(args[2]);
        Path competencesPathHDFS = competencesPath;
        FileSystem fs = FileSystem.get(conf);

        if (!conf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
            competencesPathHDFS = new Path(competencesPath.getName());
            if (!fs.exists(competencesPathHDFS)) {
                fs.mkdirs(competencesPathHDFS);
                File[] stats = new File(competencesPath.toString()).listFiles();
                for (File stat : stats) {
                    Path filePath = new Path(stat.getAbsolutePath());
                    if (FilenameUtils.getExtension(filePath.getName()).endsWith("csv")) {
                        Path dest = new Path(competencesPathHDFS.toUri() + "/" + filePath.getName());
                        fs.copyFromLocalFile(filePath, dest);
                    }
                }
            }
        }
        job.addCacheFile(competencesPathHDFS.toUri());

        FileInputFormat.setInputPaths(job, inPath);

        FileOutputFormat.setOutputPath(job, outPath);
        fs.delete(outPath, true);

        job.setMapperClass(CompetencesDistanceMapper.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(CompetencesDistanceReducer.class);
        //            job.setOutputFormatClass(TableOutputFormat.class);
        //            job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, "jobpostcompetence");
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        String[] fileNames = args[3].split(",");
        for (String n : fileNames) {
            MultipleOutputs.addNamedOutput(job, n, TextOutputFormat.class, Text.class, Text.class);
        }

        return (job.waitForCompletion(true) ? 0 : 1);
    } catch (IOException | IllegalStateException | IllegalArgumentException | InterruptedException
            | ClassNotFoundException ex) {
        Logger.getLogger(CompetencesDistanceDriver.class.getName()).log(Level.SEVERE, null, ex);
    }
    return 0;
}

From source file:eu.scape_project.archiventory.Archiventory.java

License:Apache License

public static void startHadoopJob(Configuration conf) {
    try {/*from   w w  w.jav  a  2  s  .c  om*/
        Job job = new Job(conf, "archiventory");

        // local debugging (pseudo-distributed)
        //             job.getConfiguration().set("mapred.job.tracker", "local");
        //             job.getConfiguration().set("fs.default.name", "file:///");

        job.setJarByClass(Archiventory.class);

        job.setMapperClass(Archiventory.ContainerItemIdentificationMapper.class);
        job.setReducerClass(Archiventory.ContainerItemIdentificationReducer.class);

        job.setInputFormatClass(TextInputFormat.class);

        // tabular output of identification results
        MultipleOutputs.addNamedOutput(job, "idtab", TextOutputFormat.class, Text.class, Text.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(ObjectWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(ObjectWritable.class);

        TextInputFormat.addInputPath(job, new Path(config.getDirStr()));
        String outpath = "output/" + System.currentTimeMillis();
        FileOutputFormat.setOutputPath(job, new Path(outpath));
        job.waitForCompletion(true);
        System.out.print(outpath);
        System.exit(0);
    } catch (Exception e) {
        logger.error("I/O error", e);
    }
}

From source file:eu.scape_project.spacip.Spacip.java

License:Apache License

/**
 * Start Hadoop job//  w  w w . ja va 2 s. co  m
 *
 * @param conf Hadoop job configuration
 */
public static void startHadoopJob(Configuration conf) {
    try {
        Job job = new Job(conf, "spacip_" + conf.getInt("num_items_per_task", 0));

        // local debugging (pseudo-distributed)
        //             job.getConfiguration().set("mapred.job.tracker", "local");
        //             job.getConfiguration().set("fs.default.name", "file:///");

        job.setJarByClass(Spacip.class);

        job.setMapperClass(Spacip.ContainerProcessingMapper.class);
        // No reducer needed
        job.setNumReduceTasks(0);

        job.setInputFormatClass(TextInputFormat.class);

        MultipleOutputs.addNamedOutput(job, "keyfilmapping", TextOutputFormat.class, Text.class, Text.class);
        MultipleOutputs.addNamedOutput(job, "tomarinput", TextOutputFormat.class, Text.class, Text.class);
        MultipleOutputs.addNamedOutput(job, "error", TextOutputFormat.class, Text.class, Text.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(ObjectWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(ObjectWritable.class);

        TextInputFormat.addInputPath(job, new Path(config.getDirStr()));
        String outpath = StringUtils.normdir(conf.get("joboutput_hdfs_path", "spacip_joboutput"))
                + System.currentTimeMillis();
        FileOutputFormat.setOutputPath(job, new Path(outpath));
        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
        job.waitForCompletion(true);
        // print output path (taverna integration)
        System.out.print(outpath);
        System.exit(0);
    } catch (Exception e) {
        logger.error("I/O error", e);
    }
}