Example usage for org.apache.hadoop.mapreduce Job getInstance

List of usage examples for org.apache.hadoop.mapreduce Job getInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getInstance.

Prototype

@Deprecated
public static Job getInstance(Cluster ignored, Configuration conf) throws IOException 

Source Link

Document

Creates a new Job with no particular Cluster and given Configuration .

Usage

From source file:com.kylinolap.job.hadoop.invertedindex.IIDistinctColumnsJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {/*from ww w. ja v  a  2 s.  c om*/
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_TABLE_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_INPUT_FORMAT);
        options.addOption(OPTION_INPUT_DELIM);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        String tableName = getOptionValue(OPTION_TABLE_NAME).toUpperCase();
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        String inputFormat = getOptionValue(OPTION_INPUT_FORMAT);
        String inputDelim = getOptionValue(OPTION_INPUT_DELIM);
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));

        // ----------------------------------------------------------------------------

        System.out.println("Starting: " + job.getJobName());

        setupMapInput(input, inputFormat, inputDelim);
        setupReduceOutput(output);

        // pass table and columns
        MetadataManager metaMgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
        TableDesc table = metaMgr.getTableDesc(tableName);
        job.getConfiguration().set(BatchConstants.TABLE_NAME, tableName);
        job.getConfiguration().set(BatchConstants.TABLE_COLUMNS, getColumns(table));

        return waitForCompletion(job);

    } catch (Exception e) {
        printUsage(options);
        log.error(e.getLocalizedMessage(), e);
        return 2;
    }

}

From source file:com.kylinolap.job.hadoop.invertedindex.InvertedIndexJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {//from ww w. jav a 2  s .c  o m
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_INPUT_FORMAT);
        options.addOption(OPTION_INPUT_DELIM);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        String cubeName = getOptionValue(OPTION_CUBE_NAME);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        String inputFormat = getOptionValue(OPTION_INPUT_FORMAT);
        String inputDelim = getOptionValue(OPTION_INPUT_DELIM);
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));

        // ----------------------------------------------------------------------------

        System.out.println("Starting: " + job.getJobName());

        CubeInstance cube = getCube(cubeName);

        setupMapInput(input, inputFormat, inputDelim);
        setupReduceOutput(output, cube.getInvertedIndexDesc().getSharding());
        attachMetadata(cube);

        return waitForCompletion(job);

    } catch (Exception e) {
        printUsage(options);
        log.error(e.getLocalizedMessage(), e);
        return 2;
    }

}

From source file:com.kylinolap.job.hadoop.invertedindex.RandomKeyDistributionJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {/*from  w  w  w  .  j av a 2 s.c o m*/
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_KEY_CLASS);
        options.addOption(OPTION_REGION_MB);

        parseOptions(options, args);

        // start job
        String jobName = getOptionValue(OPTION_JOB_NAME);
        job = Job.getInstance(getConf(), jobName);

        job.setJarByClass(this.getClass());
        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        FileOutputFormat.setOutputPath(job, output);

        String keyClass = getOptionValue(OPTION_KEY_CLASS);
        Class<?> keyClz = Class.forName(keyClass);

        int regionMB = Integer.parseInt(getOptionValue(OPTION_REGION_MB));

        // Mapper
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(RandomKeyDistributionMapper.class);
        job.setMapOutputKeyClass(keyClz);
        job.setMapOutputValueClass(NullWritable.class);

        // Reducer - only one
        job.setReducerClass(RandomKeyDistributionReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(keyClz);
        job.setOutputValueClass(NullWritable.class);
        job.setNumReduceTasks(1);

        this.deletePath(job.getConfiguration(), output);

        // total map input MB
        double totalMapInputMB = this.getTotalMapInputMB();
        int regionCount = Math.max(1, (int) (totalMapInputMB / regionMB));
        int mapSampleNumber = 1000;
        System.out.println("Total Map Input MB: " + totalMapInputMB);
        System.out.println("Region Count: " + regionCount);

        // set job configuration
        job.getConfiguration().set(BatchConstants.MAPPER_SAMPLE_NUMBER, String.valueOf(mapSampleNumber));
        job.getConfiguration().set(BatchConstants.REGION_NUMBER, String.valueOf(regionCount));

        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        log.error(e.getLocalizedMessage(), e);
        return 2;
    }
}

From source file:com.leon.hadoop.loganalyse.WordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.littlehotspot.hadoop.mr.box.BoxLog.java

License:Open Source License

@Override
public int run(String[] arg) throws Exception {
    try {//from www  . j  av  a 2 s.  c o  m
        // ???
        if (arg.length > 2) {
            BOX_LOG_FORMAT_REGEX = Pattern.compile(arg[2]);
        }

        Job job = Job.getInstance(this.getConf(), BoxLog.class.getSimpleName());
        job.setJarByClass(BoxLog.class);

        /***/
        Path inputPath = new Path(arg[0]);
        FileInputFormat.addInputPath(job, inputPath);
        job.setMapperClass(BoxMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        /***/
        Path outputPath = new Path(arg[1]);
        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        FileOutputFormat.setOutputPath(job, outputPath);
        job.setReducerClass(BoxReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.mobile.MobileLog.java

License:Open Source License

@Override
public int run(String[] arg) throws Exception {
    try {//  www.  j av a2s  .  c  om
        Job job = Job.getInstance(this.getConf(), MobileLog.class.getSimpleName());
        job.setJarByClass(MobileLog.class);

        /***/
        Path inputPath = new Path(arg[0]);
        FileInputFormat.addInputPath(job, inputPath);
        job.setMapperClass(MobileMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        /***/
        Path outputPath = new Path(arg[1]);
        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        FileOutputFormat.setOutputPath(job, outputPath);
        job.setReducerClass(MobileReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.nginx.module.cdf.CDFScheduler.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    try {//  www  .  j  a  v  a2 s . c om
        CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE

        String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex);
        String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath);
        String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath);

        // ???
        if (StringUtils.isNotBlank(matcherRegex)) {
            CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex);
        }

        Path inputPath = new Path(hdfsInputPath);
        Path outputPath = new Path(hdfsOutputPath);

        Job job = Job.getInstance(this.getConf(), this.getClass().getName());
        job.setJarByClass(this.getClass());

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        job.setMapperClass(CDFMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(GeneralReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.nginx.module.hdfs2hbase.api.user.UserScheduler.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    try {// w  w  w. jav a 2s .co m
        CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE
        CommonVariables.hBaseHelper = new HBaseHelper(this.getConf());

        // ??
        String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex);
        String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath);
        String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath);

        // ???
        if (StringUtils.isNotBlank(matcherRegex)) {
            CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex);
        }

        Path inputPath = new Path(hdfsInputPath);
        Path outputPath = new Path(hdfsOutputPath);

        Job job = Job.getInstance(this.getConf(), this.getClass().getName());
        job.setJarByClass(this.getClass());

        job.setMapperClass(UserMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(UserReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }

        // 
        boolean state = job.waitForCompletion(true);
        if (!state) {
            throw new Exception("MapReduce task execute failed.........");
        }

        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.mapr.db.utils.ImportCSV_MR.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 4) {
        System.out.println("MapR-DB JSON Tables - Import CSV" + "\nUsage:\n"
                + "\tParam 1: JSON Table Path (MapR-FS)\n" + "\tParam 2: Text File Path (Local-FS)\n"
                + "\tParam 3: Text File Delimiter (Local-FS)\n" + "\tParam 4: Schema File Path (Local-FS)\n");

        System.exit(-1);/*  w w  w.  ja v a 2 s .c om*/
    }

    outputTable = args[0].toString().trim();
    inputDir = args[1].toString().trim();
    delimiter = args[2].toString().trim();
    schemaFile = args[3].toString().trim();

    BasicConfigurator.configure();
    Logger.getRootLogger().setLevel(Level.ERROR);

    ImportCSV_MR imp = new ImportCSV_MR();

    imp.readSchema(schemaFile);
    imp.printSchema();

    Job job = Job.getInstance(conf, "ImportCSV_MR");
    job.setJarByClass(ImportCSV_MR.class);

    job.setMapperClass(MyMapper.class);

    conf = job.getConfiguration();
    conf.setStrings("io.serializations",
            new String[] { conf.get("io.serializations"), JSONDocumentSerialization.class.getName() });

    conf.set("countColumnsInSchema", String.valueOf(countColumnsInSchema));

    conf.set("delimiter", delimiter);

    conf.set("tablePath", outputTable);

    String valueTypes[] = valueTypesInSchema.toArray(new String[valueTypesInSchema.size()]);
    conf.setStrings("valueTypesInSchema", valueTypes);

    String columnNames[] = columnNamesInSchema.toArray(new String[columnNamesInSchema.size()]);
    conf.setStrings("columnNamesInSchema", columnNames);

    //Deciding the appropriate Input format class along with their input path
    FileInputFormat.addInputPath(job, new Path(inputDir));
    job.setInputFormatClass(TextInputFormat.class);

    //Mapper output record key and value class
    job.setMapOutputKeyClass(ByteBufWritableComparable.class);
    job.setMapOutputValueClass(DBDocumentImpl.class);

    //Deciding the appropriate Output format class along with their input path
    conf.set("maprdb.mapred.outputtable", outputTable);
    job.setOutputFormatClass(TableOutputFormat.class);

    //Reducer output record key and value class
    job.setNumReduceTasks(0);

    boolean isJobSuccessful = job.waitForCompletion(true);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
}

From source file:com.marklogic.mapreduce.examples.BinaryReader.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    if (args.length < 2) {
        System.err.println("Usage: BinaryReader configFile outputDir");
        System.exit(2);//  ww w  . ja v  a2s.com
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Job job = Job.getInstance(conf, "binary reader");
    job.setJarByClass(BinaryReader.class);
    job.setInputFormatClass(DocumentInputFormat.class);
    job.setMapperClass(DocMapper.class);
    job.setMapOutputKeyClass(DocumentURI.class);
    job.setMapOutputValueClass(BytesWritable.class);
    job.setOutputFormatClass(BinaryOutputFormat.class);
    job.setOutputKeyClass(DocumentURI.class);
    job.setOutputValueClass(BytesWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}