Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java

License:Apache License

public void addInputDirs(String input, Job job) throws IOException {
    for (String inp : StringSplitter.split(input, ",")) {
        inp = inp.trim();//from w  ww  .ja v a 2  s  .  co m
        if (inp.endsWith("/*")) {
            inp = inp.substring(0, inp.length() - 2);
            FileSystem fs = FileSystem.get(job.getConfiguration());
            Path path = new Path(inp);
            FileStatus[] fileStatuses = fs.listStatus(path);
            boolean hasDir = false;
            for (FileStatus stat : fileStatuses) {
                if (stat.isDirectory()) {
                    hasDir = true;
                    addInputDirs(stat.getPath().toString(), job);
                }
            }
            if (fileStatuses.length > 0 && !hasDir) {
                addInputDirs(path.toString(), job);
            }
        } else {
            System.out.println("Add input " + inp);
            FileInputFormat.addInputPath(job, new Path(inp));
        }
    }
}

From source file:com.kylinolap.job.hadoop.cube.CuboidJob.java

License:Apache License

protected void setReduceTaskNum(Job job, KylinConfig config, String cubeName, int level)
        throws ClassNotFoundException, IOException, InterruptedException, JobException {
    Configuration jobConf = job.getConfiguration();
    KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();

    CubeDesc cubeDesc = CubeManager.getInstance(config).getCube(cubeName).getDescriptor();

    double perReduceInputMB = kylinConfig.getDefaultHadoopJobReducerInputMB();
    double reduceCountRatio = kylinConfig.getDefaultHadoopJobReducerCountRatio();

    // total map input MB
    double totalMapInputMB = this.getTotalMapInputMB();

    // output / input ratio
    int preLevelCuboids, thisLevelCuboids;
    if (level == 0) { // base cuboid
        preLevelCuboids = thisLevelCuboids = 1;
    } else { // n-cuboid
        int[] allLevelCount = CuboidCLI.calculateAllLevelCount(cubeDesc);
        preLevelCuboids = allLevelCount[level - 1];
        thisLevelCuboids = allLevelCount[level];
    }/*from w  w w . jav  a2  s  . c  o m*/

    // total reduce input MB
    double totalReduceInputMB = totalMapInputMB * thisLevelCuboids / preLevelCuboids;

    // number of reduce tasks
    int numReduceTasks = (int) Math.round(totalReduceInputMB / perReduceInputMB * reduceCountRatio);

    // adjust reducer number for cube which has DISTINCT_COUNT measures for
    // better performance
    if (cubeDesc.hasHolisticCountDistinctMeasures()) {
        numReduceTasks = numReduceTasks * 4;
    }

    // at least 1 reducer
    numReduceTasks = Math.max(1, numReduceTasks);
    // no more than 5000 reducer by default
    numReduceTasks = Math.min(kylinConfig.getHadoopJobMaxReducerNumber(), numReduceTasks);

    jobConf.setInt(MAPRED_REDUCE_TASKS, numReduceTasks);

    System.out.println("Having total map input MB " + Math.round(totalMapInputMB));
    System.out.println("Having level " + level + ", pre-level cuboids " + preLevelCuboids
            + ", this level cuboids " + thisLevelCuboids);
    System.out.println("Having per reduce MB " + perReduceInputMB + ", reduce count ratio " + reduceCountRatio);
    System.out.println("Setting " + MAPRED_REDUCE_TASKS + "=" + numReduceTasks);
}

From source file:com.laizuozuoba.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    // System.setProperty("hadoop.home.dir", "D:\\hadoop-2.2.0");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from w w  w . j  a  va 2 s. c  om*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    Job job2 = new Job(conf, "uv");
    job2.setJarByClass(WordCount.class);
    job2.setMapperClass(UVMapper.class);
    job2.setCombinerClass(UVReducer.class);
    job2.setReducerClass(UVReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job2, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job2, new Path("hdfs://10.18.106.67:9100/result2"));

    ControlledJob controlledJob = new ControlledJob(job.getConfiguration());
    ControlledJob controlledJob2 = new ControlledJob(job2.getConfiguration());
    controlledJob2.addDependingJob(controlledJob);
    JobControl jc = new JobControl("123");
    jc.addJob(controlledJob);
    jc.addJob(controlledJob2);

    Thread jcThread = new Thread(jc);
    jcThread.start();
    while (true) {
        if (jc.allFinished()) {
            System.out.println(jc.getSuccessfulJobList());
            jc.stop();
            break;
        }
        if (jc.getFailedJobList().size() > 0) {
            System.out.println(jc.getFailedJobList());
            jc.stop();
            break;
        }
        Thread.sleep(1000);
    }
    System.out.println("Finished!!!!!!!!!!!!!!!!!!!!!!!");
}

From source file:com.leon.hadoop.loganalyse.DistributedGrep.java

License:Open Source License

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    String[] otherArgs = parser.getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: DistributedGrep <regex> <in> <out>");
        ToolRunner.printGenericCommandUsage(System.err);
        System.exit(2);/*from www.  j  a  v a2s  .c o m*/
    }
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "Distributed Grep");
    job.setJarByClass(DistributedGrep.class);
    job.setMapperClass(GrepMapper.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.getConfiguration().set(REGEX_KEY, otherArgs[0]);
    FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.BlockHasher.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: BlockHasher <imageID> <image> <output>");
        return 2;
    }/*from  www .j  a v  a 2  s .co m*/

    final String imageID = args[0];
    final String image = args[1];
    final String output = args[2];

    Configuration conf = getConf();

    final Job job = SKJobFactory.createJobFromConf(imageID, image, "BlockHasher", conf);
    job.setJarByClass(BlockHasher.class);
    job.setMapperClass(BlockHashMapper.class);
    // job.setReducerClass(Reducer.class);
    job.setNumReduceTasks(0);

    // job ctor copies the Configuration we pass it, get the real one
    conf = job.getConfiguration();

    conf.setLong("timestamp", System.currentTimeMillis());

    job.setInputFormatClass(RawFileInputFormat.class);
    RawFileInputFormat.addInputPath(job, new Path(image));

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(MD5Hash.class);
    FileOutputFormat.setOutputPath(job, new Path(output));

    conf.setInt("mapred.job.reuse.jvm.num.tasks", -1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.ExtractData.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 4) {
        System.err.println("Usage: ExtractData <imageID> <friendly_name> <extents_file> <evidence file>");
        return 2;
    }//from  w w w.j  a v  a  2  s  . c o m

    final String imageID = args[0];
    final String friendlyName = args[1];
    final String extentsPath = args[2];
    final String image = args[3];

    Configuration conf = getConf();

    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "ExtractData", conf);
    job.setJarByClass(ExtractData.class);
    job.setMapperClass(ExtractDataMapper.class);
    job.setReducerClass(KeyValueSortReducer.class);
    job.setNumReduceTasks(1);

    // job ctor copies the Configuration we pass it, get the real one
    conf = job.getConfiguration();

    conf.setLong("timestamp", System.currentTimeMillis());

    job.setInputFormatClass(RawFileInputFormat.class);
    RawFileInputFormat.addInputPath(job, new Path(image));

    job.setOutputFormatClass(HFileOutputFormat.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);

    conf.setInt("mapreduce.job.jvm.numtasks", -1);

    final FileSystem fs = FileSystem.get(conf);
    Path hfileDir = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString());
    hfileDir = hfileDir.makeQualified(fs);
    LOG.info("Hashes will be written temporarily to " + hfileDir);

    HFileOutputFormat.setOutputPath(job, hfileDir);

    final Path extp = new Path(extentsPath);
    final URI extents = extp.toUri();
    LOG.info("extents file is " + extents);

    DistributedCache.addCacheFile(extents, conf);
    conf.set("com.lbt.extentsname", extp.getName());
    // job.getConfiguration().setBoolean("mapred.task.profile", true);
    // job.getConfiguration().setBoolean("mapreduce.task.profile", true);

    HBaseTables.summon(conf, HBaseTables.HASH_TBL_B, HBaseTables.HASH_COLFAM_B);

    HBaseTables.summon(conf, HBaseTables.ENTRIES_TBL_B, HBaseTables.ENTRIES_COLFAM_B);

    final boolean result = job.waitForCompletion(true);
    if (result) {
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
        HBaseConfiguration.addHbaseResources(conf);
        loader.setConf(conf);
        LOG.info("Loading hashes into hbase");
        chmodR(fs, hfileDir);
        loader.doBulkLoad(hfileDir, new HTable(conf, HBaseTables.HASH_TBL_B));
        //      result = fs.delete(hfileDir, true);
    }
    return result ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.FolderCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();
    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: FolderCount <table> <outpath>");
        System.exit(2);//from ww  w .  j  a v  a 2 s .  c om
    }

    final Job job = new Job(conf, "FolderCount");
    job.setJarByClass(FolderCount.class);
    job.setMapperClass(FolderCountMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    final Scan scan = new Scan();
    scan.addFamily(HBaseTables.ENTRIES_COLFAM_B);
    job.getConfiguration().set(TableInputFormat.INPUT_TABLE, otherArgs[0]);
    job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan));

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.lightboxtechnologies.spectrum.FsEntryHBaseInputFormat.java

License:Apache License

static public void setupJob(Job job, String deviceID) throws IOException, DecoderException {
    Scan scan = new Scan();
    scan.addFamily(HBaseTables.ENTRIES_COLFAM_B);
    /*/*from w w w .  j  a va  2  s.  c om*/
        if (deviceID != null && deviceID.length() > 0) {
          byte[] imgID = Hex.decodeHex(deviceID.toCharArray());
          FsEntryRowFilter keyFilt = new FsEntryRowFilter(imgID);
          RowFilter filt = new RowFilter(CompareFilter.CompareOp.EQUAL, keyFilt);
          scan.setFilter(filt);
        }
    */
    HBaseConfiguration.addHbaseResources(job.getConfiguration());
    job.getConfiguration().set(TableInputFormat.INPUT_TABLE, HBaseTables.ENTRIES_TBL);
    job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan));
    job.getConfiguration().set(SKMapper.ID_KEY, deviceID);
    LOG.info("hbase.zookeeper.quorum:" + job.getConfiguration().get("hbase.zookeeper.quorum"));
}

From source file:com.lightboxtechnologies.spectrum.PythonJob.java

License:Apache License

static void configPyTask(Job job, PyEngine py, String task, String script) throws Exception {
    final Configuration conf = job.getConfiguration();
    conf.set("com.lbt.scriptName." + task, script);
    conf.set("com.lbt.script." + task, Base64.encodeFromFile(script));

    Reader in = null;//ww w  . ja  v a 2 s  . c o  m
    try {
        in = new BufferedReader(new FileReader(script));
        py.eval(in, script);
        in.close();
    } finally {
        IOUtils.closeQuietly(in);
    }
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorage.java

License:Apache License

/**
 * Set input location and obtain input schema.
 *///from www.  j  a v  a 2  s . c  o m
@SuppressWarnings("unchecked")
@Override
public void setLocation(String location, Job job) throws IOException {
    if (inputAvroSchema != null) {
        return;
    }

    if (!UDFContext.getUDFContext().isFrontend()) {
        Properties udfProps = getUDFProperties();
        String mergedSchema = udfProps.getProperty(AVRO_MERGED_SCHEMA_PROPERTY);
        if (mergedSchema != null) {
            HashMap<URI, Map<Integer, Integer>> mergedSchemaMap = (HashMap<URI, Map<Integer, Integer>>) ObjectSerializer
                    .deserialize(mergedSchema);
            schemaToMergedSchemaMap = new HashMap<Path, Map<Integer, Integer>>();
            for (Entry<URI, Map<Integer, Integer>> entry : mergedSchemaMap.entrySet()) {
                schemaToMergedSchemaMap.put(new Path(entry.getKey()), entry.getValue());
            }
        }
        String schema = udfProps.getProperty(AVRO_INPUT_SCHEMA_PROPERTY);
        if (schema != null) {
            try {
                inputAvroSchema = new Schema.Parser().parse(schema);
                return;
            } catch (Exception e) {
                // Cases like testMultipleSchemas2 cause exception while deserializing
                // symbols. In that case, we get it again.
                LOG.warn("Exception while trying to deserialize schema in backend. "
                        + "Will construct again. schema= " + schema, e);
            }
        }
    }

    Configuration conf = job.getConfiguration();
    Set<Path> paths = AvroStorageUtils.getPaths(location, conf, true);
    if (!paths.isEmpty()) {
        // Set top level directories in input format. Adding all files will
        // bloat configuration size
        FileInputFormat.setInputPaths(job, paths.toArray(new Path[paths.size()]));
        // Scan all directories including sub directories for schema
        if (inputAvroSchema == null) {
            setInputAvroSchema(paths, conf);
        }
    } else {
        throw new IOException("Input path \'" + location + "\' is not found");
    }

}