Example usage for org.apache.hadoop.mapred JobConf getInt

List of usage examples for org.apache.hadoop.mapred JobConf getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, IColumn>> getRecordReader(
        org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {
    TaskAttemptContext tac = new TaskAttemptContext(jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {
        @Override/*from w w  w. j a  v a 2s. c om*/
        public void progress() {
            reporter.progress();
        }
    };

    ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader(
            jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT));
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

From source file:com.cloudera.science.avro.streaming.AvroAsJSONOutputFormat.java

License:Open Source License

@Override
public RecordWriter<Text, Text> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {
    if (schema == null) {
        SchemaLoader loader = new SchemaLoader(job);
        this.schema = loader.load(job.get(SCHEMA_LITERAL), job.get(SCHEMA_URL), job.get(SCHEMA_TYPE_NAME));
        this.converter = new JsonConverter(schema);
        this.readKey = job.getBoolean(READ_KEY, true);
    }//w  ww  .  j a  v a 2  s  . c  o m

    DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(
            new GenericDatumWriter<GenericRecord>(schema));
    if (getCompressOutput(job)) {
        int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.CONF_OUTPUT_CODEC,
                org.apache.avro.file.DataFileConstants.DEFLATE_CODEC);
        CodecFactory codec = codecName.equals(DataFileConstants.DEFLATE_CODEC)
                ? CodecFactory.deflateCodec(level)
                : CodecFactory.fromString(codecName);
        writer.setCodec(codec);
    }
    writer.setSyncInterval(
            job.getInt(AvroOutputFormat.SYNC_INTERVAL_KEY, DataFileConstants.DEFAULT_SYNC_INTERVAL));

    Path path = FileOutputFormat.getTaskOutputPath(job, name + AvroOutputFormat.EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return new AvroAsJSONRecordWriter(writer, converter, readKey);
}

From source file:com.dataartisans.flink.cascading.runtime.util.FlinkFlowProcess.java

License:Apache License

@Override
public TupleEntryCollector openTrapForWrite(Tap trap) throws IOException {

    if (trap instanceof Hfs) {

        JobConf jobConf = new JobConf(this.getConfigCopy());

        int stepNum = jobConf.getInt("cascading.flow.step.num", 0);
        int nodeNum = jobConf.getInt("cascading.flow.node.num", 0);

        String partname = String.format("-%05d-%05d-%05d", stepNum, nodeNum, this.getCurrentSliceNum());
        jobConf.set("cascading.tapcollector.partname", "%s%spart" + partname);

        String value = String.format("attempt_%012d_0000_m_%06d_0", (int) Math.rint(System.currentTimeMillis()),
                this.getCurrentSliceNum());
        jobConf.set("mapred.task.id", value);
        jobConf.set("mapreduce.task.id", value);

        return trap.openForWrite(new FlinkFlowProcess(jobConf), null);
    } else {/*from www  .j av  a2s  .c  o m*/
        throw new UnsupportedOperationException("Only Hfs taps are supported as traps");
    }
}

From source file:com.digitalpebble.behemoth.solr.LucidWorksWriter.java

License:Apache License

public void open(JobConf job, String name) throws IOException {
    String zkHost = job.get("solr.zkhost");
    if (zkHost != null && zkHost.equals("") == false) {
        String collection = job.get("solr.zk.collection", "collection1");
        LOG.info("Indexing to collection: " + collection + " w/ ZK host: " + zkHost);
        solr = new CloudSolrServer(zkHost);
        ((CloudSolrServer) solr).setDefaultCollection(collection);
    } else {/*  w  ww.ja va  2  s  .co  m*/
        String solrURL = job.get("solr.server.url");
        int queueSize = job.getInt("solr.client.queue.size", 100);
        int threadCount = job.getInt("solr.client.threads", 1);
        solr = new StreamingUpdateSolrServer(solrURL, queueSize, threadCount);
    }
    includeMetadata = job.getBoolean("lw.metadata", false);
    includeAnnotations = job.getBoolean("lw.annotations", false);

    // get the Behemoth annotations types and features
    // to store as SOLR fields
    // solr.f.name = BehemothType.featureName
    // e.g. solr.f.person = Person.string
    Iterator<Entry<String, String>> iterator = job.iterator();
    while (iterator.hasNext()) {
        Entry<String, String> entry = iterator.next();
        if (entry.getKey().startsWith("solr.f.") == false)
            continue;
        String fieldName = entry.getKey().substring("solr.f.".length());
        String val = entry.getValue();
        // see if a feature has been specified
        // if not we'll use '*' to indicate that we want
        // the text covered by the annotation
        HashMap<String, String> featureValMap = new HashMap<String, String>();
        int separator = val.indexOf(".");
        String featureName = "*";
        if (separator != -1)
            featureName = val.substring(separator + 1);
        featureValMap.put(featureName, fieldName);
        fieldMapping.put(entry.getValue(), featureValMap);
        LOG.debug("Adding to mapping " + entry.getValue() + " " + featureName + " " + fieldName);
    }
}

From source file:com.digitalpebble.behemoth.solr.SOLRWriter.java

License:Apache License

public void open(JobConf job, String name) throws IOException {
    String zkHost = job.get("solr.zkhost");
    if (zkHost != null && zkHost.equals("") == false) {
        String collection = job.get("solr.zk.collection", "collection1");
        LOG.info("Indexing to collection: " + collection + " w/ ZK host: " + zkHost);
        solr = new CloudSolrServer(zkHost);
        ((CloudSolrServer) solr).setDefaultCollection(collection);
    } else {/*w  w  w .  j  a v a2 s. com*/
        String solrURL = job.get("solr.server.url");
        int queueSize = job.getInt("solr.client.queue.size", 100);
        int threadCount = job.getInt("solr.client.threads", 1);
        solr = new ConcurrentUpdateSolrServer(solrURL, queueSize, threadCount);
    }
    String paramsString = job.get("solr.params");
    if (paramsString != null) {
        params = new ModifiableSolrParams();
        String[] pars = paramsString.trim().split("\\&");
        for (String kvs : pars) {
            String[] kv = kvs.split("=");
            if (kv.length < 2) {
                LOG.warn("Invalid Solr param " + kvs + ", skipping...");
                continue;
            }
            params.add(kv[0], kv[1]);
        }
        LOG.info("Using Solr params: " + params.toString());
    }

    includeMetadata = job.getBoolean("solr.metadata", false);
    includeAnnotations = job.getBoolean("solr.annotations", false);
    useMetadataPrefix = job.getBoolean("solr.metadata.use.prefix", false);
    metadataPrefix = job.get("solr.metadata.prefix", "attr_");
    annotationPrefix = job.get("solr.annotation.prefix", "annotate_");
    useAnnotationPrefix = job.getBoolean("solr.annotation.use.prefix", false);
    populateSolrFieldMappingsFromBehemothAnnotationsTypesAndFeatures(job);
}

From source file:com.ebay.erl.mobius.core.datajoin.EvenlyPartitioner.java

License:Apache License

/**
 * Read in the partition file and build indexing data structures.
 * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
 * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
 * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
 * will be built. Otherwise, keys will be located using a binary search of
 * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
 * defined for this job. The input file must be sorted with the same
 * comparator and contain {@link// www . ja v  a 2s  .  c o  m
org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys.
 */
@SuppressWarnings("unchecked") // keytype from conf not static
public void configure(JobConf job) {
    try {
        String parts = getPartitionFile(job);
        final Path partFile = new Path(parts);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(job) // assume in DistributedCache
                : partFile.getFileSystem(job);

        //Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass();
        K[] splitPoints = readPartitions(fs, partFile, (Class<K>) Tuple.class, job);
        if (splitPoints.length != job.getNumReduceTasks() - 1) {
            throw new IOException("Wrong number of partitions in keyset");
        }
        RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator();
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = job.getBoolean("total.order.partitioner.natural.order", true);
        if (natOrder && BinaryComparable.class.isAssignableFrom(Tuple.class)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
                    job.getInt("total.order.partitioner.max.trie.depth", 2));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
}

From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java

License:Apache License

@Override
protected synchronized void submit() {
    JobConf jobConf = this.getJobConf();
    boolean isLocalHadoop = jobConf.get("mapred.job.tracker", "local").equals("local");

    // the default partitioner is {@link com.ebay.erl.mobius.core.datajoin.DataJoinKeyPartitioner}
    // which is hash based.
    ///*from www . j  a va  2s  .  co  m*/
    // If user choose to use even partitioner, Mobius will use
    // {@link com.ebay.erl.mobius.core.datajoin.EvenlyPartitioner} which
    // is sampling based partitioner of attempting to balance the load
    // for each reducer.
    String partitioner = jobConf.get("mobius.partitioner", "default");

    if (!isLocalHadoop && jobConf.getNumReduceTasks() != 0 && partitioner.equals("even")) {
        // this job needs reducer, perform sampling on the keys to 
        // make load on reducers are almost evenly distributed.

        double freq = jobConf.getFloat("mobius.sampler.freq", 0.1F);
        int numSamples = jobConf.getInt("mobius.sampler.num.samples", 50000);
        int maxSplits = jobConf.getInt("mobius.sampler.max.slipts.sampled", 5);

        // log sampling parameters so that user knows.
        LOGGER.info("Sampling parameters { " + "mobius.sampler.freq:" + format.format(freq) + ", "
                + "mobius.sampler.num.samples:" + numSamples + ", " + "mobius.sampler.max.slipts.sampled:"
                + maxSplits + "}");

        InputSampler.Sampler<?, ?> sampler = new MobiusInputSampler(freq, numSamples, maxSplits);

        writePartitionFile(jobConf, sampler);

        // add to distributed cache
        try {
            URI partitionUri = new URI(TotalOrderPartitioner.getPartitionFile(jobConf) + "#_partitions");
            LOGGER.info("Adding partition uri to distributed cache:" + partitionUri.toString());

            DistributedCache.addCacheFile(partitionUri, jobConf);
            DistributedCache.createSymlink(jobConf);
            jobConf.setPartitionerClass(EvenlyPartitioner.class);

            LOGGER.info("Using " + EvenlyPartitioner.class.getCanonicalName()
                    + " to partiton the keys evenly among reducers.");
        } catch (URISyntaxException e) {
            LOGGER.error(e.getMessage(), e);
            throw new RuntimeException(e);
        }

        // adding -XX:-UseParallelOldGC, this will automatically set -XX:-UseParallelGC
        // according to Oracle's specification
        String jvmOpts = jobConf.get("mapred.child.java.opts", "");
        if (jvmOpts.isEmpty()) {
            jvmOpts = "-XX:-UseParallelOldGC";
        } else {
            if (jvmOpts.indexOf("-XX:-UseParallelOldGC") < 0) {
                // remove "
                jvmOpts = jvmOpts.replaceAll("\"", "");
                jvmOpts = jvmOpts.concat(" -XX:-UseParallelOldGC");
            }
        }
        jobConf.set("mapred.child.java.opts", jvmOpts);

        this.setJobConf(jobConf);
    }
    LOGGER.info("Submiting job:" + jobConf.getJobName());
    super.submit();
}

From source file:com.flaptor.hounder.crawler.Nutch9Fetcher.java

License:Apache License

/**
 * Initialize the fetcher.//  ww  w  . j  a v a2 s .c om
 */
public Nutch9Fetcher() {
    Config config = Config.getConfig("nutchfetcher.properties");
    segmentsDir = config.getString("fetchlist.dir");
    keepUrl = config.getBoolean("keep.original.url.on.redirect");
    fetcher = new Fetcher();
    Configuration conf = new Configuration();
    // conf.addDefaultResource("crawl-tool.xml");
    conf.addDefaultResource("nutch-default.xml");
    conf.addDefaultResource("nutch-site.xml");
    JobConf job = new NutchJob(conf);
    threads = job.getInt("fetcher.threads.fetch", 10);
    fetcher.setConf(conf);
}

From source file:com.github.gaoyangthu.demo.mapred.dancing.DistributedPentomino.java

License:Apache License

public int run(String[] args) throws Exception {
    JobConf conf;
    int depth = 5;
    int width = 9;
    int height = 10;
    Class<? extends Pentomino> pentClass;
    if (args.length == 0) {
        System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }//  ww  w . j  av  a  2 s  . c o m

    conf = new JobConf(getConf());

    // Pick up the parameters, should the user set these
    width = conf.getInt("pent.width", width);
    height = conf.getInt("pent.height", height);
    depth = conf.getInt("pent.depth", depth);
    pentClass = conf.getClass("pent.class", OneSidedPentomino.class, Pentomino.class);

    for (int i = 0; i < args.length; i++) {
        if (args[i].equalsIgnoreCase("-depth")) {
            depth = Integer.parseInt(args[++i].trim());
        } else if (args[i].equalsIgnoreCase("-height")) {
            height = Integer.parseInt(args[++i].trim());
        } else if (args[i].equalsIgnoreCase("-width")) {
            width = Integer.parseInt(args[++i].trim());
        }
    }

    // Set parameters for MR tasks to pick up either which way the user sets
    // them or not
    conf.setInt("pent.width", width);
    conf.setInt("pent.height", height);
    conf.setInt("pent.depth", depth);

    Path output = new Path(args[0]);
    Path input = new Path(output + "_input");
    FileSystem fileSys = FileSystem.get(conf);
    try {
        FileInputFormat.setInputPaths(conf, input);
        FileOutputFormat.setOutputPath(conf, output);
        conf.setJarByClass(PentMap.class);

        conf.setJobName("dancingElephant");
        Pentomino pent = ReflectionUtils.newInstance(pentClass, conf);
        pent.initialize(width, height);
        createInputDirectory(fileSys, input, pent, depth);

        // the keys are the prefix strings
        conf.setOutputKeyClass(Text.class);
        // the values are puzzle solutions
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(PentMap.class);
        conf.setReducerClass(IdentityReducer.class);

        conf.setNumMapTasks(2000);
        conf.setNumReduceTasks(1);

        JobClient.runJob(conf);
    } finally {
        fileSys.delete(input, true);
    }
    return 0;
}

From source file:com.hadoopilluminated.examples.dancing.DistributedPentomino.java

License:Apache License

public int run(String[] args) throws Exception {
    JobConf conf;
    int depth = 5;
    int width = 9;
    int height = 10;
    Class<? extends Pentomino> pentClass;
    if (args.length == 0) {
        System.out.println("pentomino <output>");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }/*from  ww w  . jav a2s.co  m*/

    conf = new JobConf(getConf());
    width = conf.getInt("pent.width", width);
    height = conf.getInt("pent.height", height);
    depth = conf.getInt("pent.depth", depth);
    pentClass = conf.getClass("pent.class", OneSidedPentonimo.class, Pentomino.class);

    Path output = new Path(args[0]);
    Path input = new Path(output + "_input");
    FileSystem fileSys = FileSystem.get(conf);
    try {
        FileInputFormat.setInputPaths(conf, input);
        FileOutputFormat.setOutputPath(conf, output);
        conf.setJarByClass(PentMap.class);

        conf.setJobName("dancingElephant");
        Pentomino pent = ReflectionUtils.newInstance(pentClass, conf);
        pent.initialize(width, height);
        createInputDirectory(fileSys, input, pent, depth);

        // the keys are the prefix strings
        conf.setOutputKeyClass(Text.class);
        // the values are puzzle solutions
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(PentMap.class);
        conf.setReducerClass(IdentityReducer.class);

        conf.setNumMapTasks(2000);
        conf.setNumReduceTasks(1);

        JobClient.runJob(conf);
    } finally {
        fileSys.delete(input, true);
    }
    return 0;
}