Example usage for org.apache.hadoop.mapreduce Job setJobName

List of usage examples for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException 

Source Link

Document

Set the user-specified job name.

Usage

From source file:com.sequenceiq.yarntest.mr.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi/*from w  ww .ja v  a2s.c  o m*/
 */
public static JobID submitPiEstimationMRApp(String jobName, int numMaps, long numPoints, Path tmpDir,
        Configuration conf) throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(conf);
    //setup job conf
    job.setJobName(jobName);
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        fs.delete(tmpDir, true);
        //      throw new IOException("Tmp directory " + fs.makeQualified(tmpDir)
        //          + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    //  try {
    //generate an input file for each map task
    for (int i = 0; i < numMaps; ++i) {
        final Path file = new Path(inDir, "part" + i);
        final LongWritable offset = new LongWritable(i * numPoints);
        final LongWritable size = new LongWritable(numPoints);
        final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                LongWritable.class, CompressionType.NONE);
        try {
            writer.append(offset, size);
        } finally {
            writer.close();
        }
        System.out.println("Wrote input for Map #" + i);
    }

    //start a map/reduce job
    System.out.println("Starting Job");
    final long startTime = System.currentTimeMillis();
    job.submit();
    //      final double duration = (System.currentTimeMillis() - startTime)/1000.0;
    //      System.out.println("Job Finished in " + duration + " seconds");
    return job.getJobID();

    //    } finally {
    //      fs.delete(tmpDir, true);
    //    }
}

From source file:com.shmsoft.dmass.main.MRFreeEedProcess.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // inventory dir holds all package (zip) files resulting from stage
    String projectFileName = args[0];
    String outputPath = args[1];//from  w  ww. j a v a  2s .  co  m
    logger.info("Running Hadoop job");
    logger.info("Input project file = " + projectFileName);
    logger.info("Output path = " + outputPath);

    // Hadoop configuration class
    Configuration configuration = getConf();
    // No speculative execution! Do not process the same file twice
    configuration.set("mapred.reduce.tasks.speculative.execution", "false");
    // TODO even in local mode, the first argument should not be the inventory
    // but write a complete project file instead
    Project project = Project.getProject();
    if (project == null || project.isEmpty()) {
        // configure Hadoop input files
        System.out.println("Reading project file " + projectFileName);
        project = new Project().loadFromFile(new File(projectFileName));
        Project.setProject(project);
    }
    project.setProperty(ParameterProcessing.OUTPUT_DIR_HADOOP, outputPath);
    // send complete project information to all mappers and reducers
    configuration.set(ParameterProcessing.PROJECT, project.toString());

    Settings.load();
    configuration.set(ParameterProcessing.SETTINGS_STR, Settings.getSettings().toString());
    configuration.set(ParameterProcessing.METADATA_FILE,
            Files.toString(new File(ColumnMetadata.metadataNamesFile), Charset.defaultCharset()));
    Job job = new Job(configuration);
    job.setJarByClass(MRFreeEedProcess.class);
    job.setJobName("MRFreeEedProcess");

    // Hadoop processes key-value pairs
    job.setOutputKeyClass(MD5Hash.class);
    job.setOutputValueClass(MapWritable.class);

    // set map and reduce classes
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    // Hadoop TextInputFormat class
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    //        String delim = "\u0001";
    //        configuration.set("mapred.textoutputformat.separator", delim);
    //        configuration.set("mapreduce.output.textoutputformat.separator", delim);

    logger.debug("project.isEnvHadoop() = {} ", project.isEnvHadoop());
    String inputPath = projectFileName;
    if (project.isEnvHadoop() || Settings.getSettings().isHadoopDebug()) {
        inputPath = formInputPath(project);
    }

    logger.debug("Ready to run, inputPath = {}, outputPath = {}", inputPath, outputPath);
    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    SHMcloudLogging.init(false);

    if (Settings.getSettings().isHadoopDebug()) {
        if (new File(outputPath).exists()) {
            Util.deleteDirectory(new File(outputPath));
        }
    }

    SolrIndex.getInstance().init();

    boolean success = job.waitForCompletion(true);
    if (project.isEnvHadoop() && project.isFsS3()) {
        transferResultsToS3(outputPath);
    }

    SolrIndex.getInstance().destroy();

    return success ? 0 : 1;
}

From source file:com.shopzilla.hadoop.mapreduce.MiniMRClusterContextMRTest.java

License:Apache License

@Test
public void testWordCount() throws Exception {
    Path input = new Path("/user/test/keywords_data");
    Path output = new Path("/user/test/word_count");

    Job job = new Job(configuration);

    job.setJobName("Word Count Test");

    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(SumReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setNumReduceTasks(1);/*from   w  w w.  jav  a 2  s  . c o m*/
    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    assertTrue("All files from /data classpath directory should have been copied into HDFS",
            miniMRClusterContext.getFileSystem().exists(input));

    job.waitForCompletion(true);

    assertTrue("Output file should have been created", miniMRClusterContext.getFileSystem().exists(output));

    final LinkedList<String> expectedLines = new LinkedList<String>();
    expectedLines.add("goodbye\t1");
    expectedLines.add("hello\t1");
    expectedLines.add("world\t2");

    miniMRClusterContext.processData(output, new Function<String, Void>() {
        @Override
        public Void apply(String line) {
            assertEquals(expectedLines.pop(), line);
            return null;
        }
    });
    assertEquals(0, expectedLines.size());
}

From source file:com.skp.experiment.cf.als.hadoop.DistributedParallelALSFactorizationJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    addInputOption();/*from  w  w  w. j  a va  2  s  . c om*/
    addOutputOption();
    addOption("lambda", null, "regularization parameter", true);
    addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false));
    addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40));
    addOption("numFeatures", null, "dimension of the feature space", true);
    addOption("numIterations", null, "number of iterations", true);
    addOption("numUsers", null, "number of users", true);
    addOption("numItems", null, "number of items", true);
    addOption("blockSize", null, "dfs block size.", false);
    //addOption("runIterations", null, "true or false for iterations", true);

    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    numFeatures = Integer.parseInt(parsedArgs.get("--numFeatures"));
    numIterations = Integer.parseInt(parsedArgs.get("--numIterations"));
    lambda = Double.parseDouble(parsedArgs.get("--lambda"));
    alpha = Double.parseDouble(parsedArgs.get("--alpha"));
    implicitFeedback = Boolean.parseBoolean(parsedArgs.get("--implicitFeedback"));
    numUsers = Integer.parseInt(parsedArgs.get("--numUsers"));
    numItems = Integer.parseInt(parsedArgs.get("--numItems"));
    dfsBlockSize = getOption("blockSize") == null ? 64 * 1024 * 1024 : Long.parseLong(getOption("blockSize"));
    /*
        * compute the factorization A = U M'
        *
        * where A (users x items) is the matrix of known ratings
        *           U (users x features) is the representation of users in the feature space
        *           M (items x features) is the representation of items in the feature space
        */

    /* create A' */
    Job itemRatings = prepareJob(getInputPath(), pathToItemRatings(), TextInputFormat.class,
            ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class,
            IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
    itemRatings.setCombinerClass(VectorSumReducer.class);
    itemRatings.waitForCompletion(true);
    //numItems = 
    //    (int) itemRatings.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue();
    log.info("Number of Items\t{}", numItems);

    /* create A */
    Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class,
            IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class,
            VectorWritable.class);
    userRatings.setCombinerClass(MergeVectorsCombiner.class);
    userRatings.waitForCompletion(true);
    //numUsers = 
    //    (int) userRatings.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue();
    log.info("Number of Users\t{}", numUsers);

    /* count item per user */
    Job userItemCntsJob = prepareJob(pathToUserRatings(), getOutputPath("userItemCnts"),
            SequenceFileInputFormat.class, UserItemCntsMapper.class, IntWritable.class, IntWritable.class,
            UserItemCntsReducer.class, IntWritable.class, IntWritable.class, SequenceFileOutputFormat.class);
    userItemCntsJob.setJobName("user ratings count");
    userItemCntsJob.setCombinerClass(UserItemCntsReducer.class);
    userItemCntsJob.waitForCompletion(true);

    //TODO this could be fiddled into one of the upper jobs
    Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"),
            AverageRatingMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class,
            IntWritable.class, VectorWritable.class);
    averageItemRatings.setCombinerClass(MergeVectorsCombiner.class);
    averageItemRatings.waitForCompletion(true);

    Vector averageRatings = ALSMatrixUtil.readFirstRow(getTempPath("averageRatings"), getConf());

    /* create an initial M */
    initializeM(averageRatings);

    for (int currentIteration = 0; currentIteration < numIterations; currentIteration++) {
        DistributedRowMatrix curM = new DistributedRowMatrix(pathToM(currentIteration - 1),
                getTempPath("Mtemp" + String.valueOf(currentIteration - 1)), numItems, numFeatures);
        curM.setConf(new Configuration());
        DistributedRowMatrix YtransposeY = curM.times(curM);

        // broadcast M, read A row-wise, recompute U row-wise //
        log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations);
        runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1),
                YtransposeY.getRowPath(), numItems);

        DistributedRowMatrix curU = new DistributedRowMatrix(pathToU(currentIteration),
                getTempPath("Utmp" + String.valueOf(currentIteration)), numUsers, numFeatures);
        curU.setConf(new Configuration());
        DistributedRowMatrix XtransposeX = curU.times(curU);

        // set up index of U //
        CreateMapFileFromSeq.createMapFile(pathToU(currentIteration));

        // broadcast U, read A' row-wise, recompute M row-wise //
        log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations);
        runDistributedImplicitSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration),
                XtransposeX.getRowPath(), numUsers);
    }
    return 0;
}

From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*from   www. j a  va 2  s  .  c o m*/
    addOutputOption();
    addOption("lambda", null, "regularization parameter", true);
    addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false));
    addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40));
    addOption("numFeatures", null, "dimension of the feature space", true);
    addOption("numIterations", null, "number of iterations", true);
    addOption("indexSizes", null, "index sizes Path", true);
    addOption("startIteration", null, "start iteration number", String.valueOf(0));
    addOption("oldM", null, "old M matrix Path.", null);
    addOption("largeUserFeatures", null, "true if user x feature matrix is too large for memory",
            String.valueOf(true));
    addOption("rmseCurve", null, "true if want to extract rmse curve", String.valueOf(true));
    addOption("cleanUp", null, "true if want to clean up temporary matrix", String.valueOf(true));
    addOption("useTransform", null, "true if using logarithm as transform", String.valueOf(true));
    addOption("rateIndex", null, "0 based index for rate column in input file.", String.valueOf(2));
    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    try {
        /** step 0: fetch dimention of training set matrix. */
        Map<String, String> indexSizesTmp = ALSMatrixUtil.fetchTextFiles(new Path(getOption("indexSizes")),
                DELIMETER, Arrays.asList(0), Arrays.asList(1));

        numFeatures = Integer.parseInt(parsedArgs.get("--numFeatures"));
        numIterations = Integer.parseInt(parsedArgs.get("--numIterations"));
        lambda = Double.parseDouble(parsedArgs.get("--lambda"));
        alpha = Double.parseDouble(parsedArgs.get("--alpha"));
        implicitFeedback = Boolean.parseBoolean(parsedArgs.get("--implicitFeedback"));
        numUsers = Integer.parseInt(indexSizesTmp.get("0"));
        numItems = Integer.parseInt(indexSizesTmp.get("1"));

        numTaskTrackers = HadoopClusterUtil.getNumberOfTaskTrackers(getConf()) * multiplyMapTasks;
        startIteration = Integer.parseInt(parsedArgs.get("--startIteration"));
        largeUserFeatures = Boolean.parseBoolean(getOption("largeUserFeatures"));
        useRMSECurve = Boolean.parseBoolean(getOption("rmseCurve"));
        cleanUp = Boolean.parseBoolean(getOption("cleanUp"));
        useTransform = Boolean.parseBoolean(getOption("useTransform"));
        rateIndex = Integer.parseInt(getOption("rateIndex"));
        FileSystem fs = FileSystem.get(getConf());
        if (!fs.exists(pathToTransformed())) {
            if (useTransform) {
                // transform price into rating
                Job transformJob = prepareJob(getInputPath(), pathToTransformed(), TextInputFormat.class,
                        TransformColumnValueMapper.class, NullWritable.class, Text.class,
                        TextOutputFormat.class);
                transformJob.waitForCompletion(true);
            } else {

                FileUtil.copy(FileSystem.get(getConf()), getInputPath(), FileSystem.get(getConf()),
                        pathToTransformed(), false, getConf());
            }
        }
        /*
        if (getOption("oldM") != null) {
          runOnetimeSolver(pathToTransformed(), getOutputPath("U"), new Path(getOption("oldM")));
          return 0;
        }
        */
        /*
            * compute the factorization A = U M'
            *
            * where A (users x items) is the matrix of known ratings
            *           U (users x features) is the representation of users in the feature space
            *           M (items x features) is the representation of items in the feature space
            */
        if (startIteration == 0) {
            if (!fs.exists(pathToItemRatings())) {
                // create A' 
                Job itemRatings = prepareJob(pathToTransformed(), pathToItemRatings(), TextInputFormat.class,
                        ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class,
                        VectorSumReducer.class, IntWritable.class, VectorWritable.class,
                        SequenceFileOutputFormat.class);
                itemRatings.setCombinerClass(VectorSumReducer.class);
                long matrixSizeExp = (long) (8L * numUsers * numFeatures * SAFE_MARGIN);
                long memoryThreshold = HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT
                        / (long) HadoopClusterUtil.MAP_TASKS_PER_NODE;
                int numTaskPerDataNode = Math.max(1,
                        (int) (HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / (double) matrixSizeExp));
                //log.info("matrix Size: " + matrixSizeExp + ", memorhThreshold: " + memoryThreshold + ", numTaskPerDataNode: " + numTaskPerDataNode);
                if (matrixSizeExp > memoryThreshold) {
                    //log.info("A: {}", numTaskPerDataNode * HadoopClusterUtil.getNumberOfTaskTrackers(getConf()));
                    int numReducer = Math.min(
                            numTaskPerDataNode * HadoopClusterUtil.getNumberOfTaskTrackers(getConf()),
                            HadoopClusterUtil.getMaxMapTasks(getConf()));
                    //log.info("Number Of Reducer: " + numReducer);
                    itemRatings.setNumReduceTasks(numReducer);
                }

                itemRatings.waitForCompletion(true);
            }

            if (!fs.exists(pathToUserRatings())) {
                Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class,
                        IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class,
                        VectorWritable.class);
                userRatings.setNumReduceTasks(HadoopClusterUtil.getNumberOfTaskTrackers(getConf()));
                userRatings.setCombinerClass(MergeVectorsCombiner.class);
                userRatings.setNumReduceTasks(HadoopClusterUtil.getMaxMapTasks(getConf()));
                userRatings.waitForCompletion(true);
            }
            if (!fs.exists(getOutputPath("userItemCnt"))) {
                // count item per user
                Job userItemCntsJob = prepareJob(pathToUserRatings(), getOutputPath("userItemCnt"),
                        SequenceFileInputFormat.class, UserItemCntsMapper.class, IntWritable.class,
                        IntWritable.class, SequenceFileOutputFormat.class);
                userItemCntsJob.setJobName("user ratings count");
                userItemCntsJob.waitForCompletion(true);
            }

            if (!fs.exists(getTempPath("averageRatings"))) {
                //TODO this could be fiddled into one of the upper jobs
                Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"),
                        AverageRatingMapper.class, IntWritable.class, VectorWritable.class,
                        MergeVectorsReducer.class, IntWritable.class, VectorWritable.class);
                averageItemRatings.setCombinerClass(MergeVectorsCombiner.class);
                averageItemRatings.waitForCompletion(true);
            }
            if (!fs.exists(new Path(pathToM(-1), "part-m-00000"))) {
                Vector averageRatings = ALSMatrixUtil.readFirstRow(getTempPath("averageRatings"), getConf());

                /** create an initial M */
                initializeM(averageRatings);
            }
        }

        for (int currentIteration = startIteration; currentIteration < numIterations; currentIteration++) {
            DistributedRowMatrix curM = new DistributedRowMatrix(pathToM(currentIteration - 1),
                    getTempPath("Mtemp/tmp-" + String.valueOf(currentIteration - 1) + "/M"), numItems,
                    numFeatures);
            curM.setConf(getConf());
            DistributedRowMatrix YtransposeY = curM.times(curM);
            /** broadcast M, read A row-wise, recompute U row-wise */
            log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations);
            runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1),
                    YtransposeY.getRowPath(), numItems, false);

            DistributedRowMatrix curU = new DistributedRowMatrix(pathToU(currentIteration),
                    getTempPath("Utmp/tmp-" + String.valueOf(currentIteration) + "/U"), numUsers, numFeatures);
            curU.setConf(getConf());
            DistributedRowMatrix XtransposeX = curU.times(curU);

            /** broadcast U, read A' row-wise, recompute M row-wise */
            log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations);
            runSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration),
                    XtransposeX.getRowPath(), numUsers, largeUserFeatures);

            /** calculate rmse on each updated matrix U, M and decide to further iteration */
            if (currentIteration > startIteration && useRMSECurve) {
                Pair<Integer, Double> UsquaredError = calculateMatrixDistanceSquared(
                        pathToU(currentIteration - 1), pathToU(currentIteration), currentIteration);
                Pair<Integer, Double> MsquaredError = calculateMatrixDistanceSquared(
                        pathToM(currentIteration - 1), pathToM(currentIteration), currentIteration);
                String currentRMSE = currentIteration + DELIMETER + UsquaredError.getFirst() + DELIMETER
                        + UsquaredError.getSecond() + DELIMETER + MsquaredError.getFirst() + DELIMETER
                        + MsquaredError.getSecond() + DefaultOptionCreator.NEWLINE;
                rmsePerIteration += currentRMSE;
                log.info("iteration {}: {}", currentIteration, currentRMSE);
            }
            if (currentIteration >= startIteration + 2 && cleanUp) {
                fs.deleteOnExit(pathToU(currentIteration - 2));
                fs.deleteOnExit(pathToM(currentIteration - 2));
            }
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return -1;
    } finally {
        if (useRMSECurve) {
            HadoopClusterUtil.writeToHdfs(getConf(), getOutputPath("RMSE"), rmsePerIteration);
        }
    }
}

From source file:com.soteradefense.dga.louvain.mapreduce.CommunityCompression.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration mrConf = this.getConf();
    for (java.util.Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) {
        mrConf.set(entry.getKey(), entry.getValue());
    }/*w w w.ja  v  a2  s  . c  o m*/

    Job job = Job.getInstance(mrConf);
    job.setJarByClass(CommunityCompression.class);
    Path in = new Path(inputPath);
    Path out = new Path(outputPath);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setJobName("CommunityCompression");

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LouvainVertexWritable.class);

    job.setMapperClass(CommunityCompression.Map.class);
    job.setReducerClass(CommunityCompression.Reduce.class);

    logger.debug("Running Mapreduce step with job configuration: {}", job);

    return job.waitForCompletion(false) ? 0 : 1;
}

From source file:com.soteradefense.dga.louvain.mapreduce.LouvainTableSynthesizer.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = null;
    try {//from w  w  w.j a  v a 2 s  . c  o m
        int iteration = 0;
        if (!basePath.endsWith("/"))
            basePath = basePath + "/";
        String inputPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + iteration;
        String joinPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + (iteration + 1);
        String outputPath = basePath + TABLE_BASE_NAME + FILE_NAME_SEPARATOR + iteration;
        Configuration mrConf = this.getConf();
        job = Job.getInstance(mrConf);

        for (Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) {
            mrConf.set(entry.getKey(), entry.getValue());
        }

        FileSystem fs = FileSystem.get(job.getConfiguration());
        boolean nextFileExists = fs.exists(new Path(joinPath));
        while (nextFileExists) {
            System.out.println("Processing " + inputPath + " and " + joinPath);
            job = Job.getInstance(mrConf);
            job.setJobName("Louvain Table Synthesizer " + iteration);

            job.setJarByClass(LouvainTableSynthesizer.class);

            job.setMapperClass(LouvainTableSynthesizerMapper.class);
            job.setReducerClass(LouvainTableSynthesizerReducer.class);

            job.setInputFormatClass(TextInputFormat.class);
            job.setOutputFormatClass(TextOutputFormat.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);

            //Reducer Output
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(NullWritable.class);

            //Add both input folders
            Path in = new Path(inputPath);
            Path joinIn = new Path(joinPath);
            Path out = new Path(outputPath);
            FileInputFormat.addInputPath(job, in);
            FileInputFormat.addInputPath(job, joinIn);
            FileOutputFormat.setOutputPath(job, out);

            job.waitForCompletion(true);
            //Set the new temp input path
            inputPath = outputPath;
            iteration++;
            outputPath = basePath + TABLE_BASE_NAME + FILE_NAME_SEPARATOR + iteration;
            joinPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + (iteration + 1);
            nextFileExists = fs.exists(new Path(joinPath));
        }

    } catch (IOException e) {
        e.printStackTrace();
        return -1;
    } catch (InterruptedException e) {
        e.printStackTrace();
        return -1;
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        return -1;
    }
    return 0;
}

From source file:com.soteradefense.dga.LouvainRunner.java

License:Apache License

private int runMapreduceJob(String inputPath, String outputPath, DGAConfiguration conf) throws Exception {
    Configuration mrConf = new Configuration();
    for (Map.Entry<String, String> entry : conf.getSystemProperties().entrySet()) {
        mrConf.set(entry.getKey(), entry.getValue());
    }//from  w  w  w.ja  v a 2s .c  o m

    Job job = Job.getInstance(configuration);
    job.setJarByClass(LouvainRunner.class);
    Path in = new Path(inputPath);
    Path out = new Path(outputPath);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setJobName("CommunityCompression");

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LouvainVertexWritable.class);

    job.setMapperClass(CommunityCompression.Map.class);
    job.setReducerClass(CommunityCompression.Reduce.class);

    logger.debug("Running Mapreduce step with job configuration: {}", job);

    return job.waitForCompletion(false) ? 0 : 1;
}

From source file:com.sreejith.loganalyzer.mapreduce.LogDriver.java

License:Apache License

public static void main(String[] args) throws Exception {
    Job job = new Job();
    job.setJarByClass(LogDriver.class);
    job.setJobName("Log Analyzer");

    job.setMapperClass(LogMapper.class);
    job.setPartitionerClass(LogPartitioner.class);
    job.setCombinerClass(LogReducer.class);
    job.setReducerClass(LogReducer.class);

    job.setNumReduceTasks(2);//from www .  jav a2  s  .  c  o  m

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);

}

From source file:com.sudarmuthu.hadoop.countwords.CountWords.java

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: CountWords <input path> <output path>");
        System.exit(-1);/*from  w ww.  ja va2  s  . c  o m*/
    }

    Job job = new Job();
    job.setJarByClass(CountWords.class);
    job.setJobName("Count Words");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(CountWordsMapper.class);
    job.setReducerClass(CountWordsReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}