Example usage for org.apache.hadoop.mapred JobConf getInt

List of usage examples for org.apache.hadoop.mapred JobConf getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:org.commoncrawl.hadoop.io.JetS3tARCSource.java

License:Open Source License

/**
 * @inheritDoc//  w w  w . j  a  va 2  s  .  com
 */
@Override
protected void configureImpl(JobConf job) {
    try {

        // Pull credentials from the configuration
        String awsAccessKeyId = getProperty(job, P_AWS_ACCESS_KEY_ID);
        String awsSecretAccessKey = getProperty(job, P_AWS_SECRET_ACCESS_KEY);
        String bucketName = getProperty(job, P_BUCKET_NAME);

        // Instantiate JetS3t classes
        AWSCredentials awsCredentials = new AWSCredentials(awsAccessKeyId, awsSecretAccessKey);
        service = new RestS3Service(awsCredentials);
        bucket = new S3Bucket(bucketName);

        maxTries = job.getInt(P_MAX_TRIES, 4);

    } catch (S3ServiceException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.commoncrawl.hadoop.io.S3GetMetdataJob.java

License:Open Source License

public void configure(JobConf job) {

    _attemptID = TaskAttemptID.forName(job.get("mapred.task.id"));
    _maxAttemptsPerTask = job.getInt("mapred.max.tracker.failures", 4);
    _splitDetails = job.get(ARCSplitReader.SPLIT_DETAILS, "Spit Details Unknown");
}

From source file:org.commoncrawl.hadoop.template.SampleHadoopJob.java

License:Open Source License

/** overloaded to initialize class variables from job config **/
@Override/*from   www  .  ja  v a2  s.com*/
public void configure(JobConf job) {

    attemptID = TaskAttemptID.forName(job.get("mapred.task.id"));
    maxAttemptTaskId = job.getInt("mapred.max.tracker.failures", 4) - 1;
    splitDetails = job.get(ARCSplitReader.SPLIT_DETAILS, "Spit Details Unknown");
    pattern = Pattern.compile(job.get("mapred.mapper.regex"));
    group = job.getInt("mapred.mapper.regex.group", 0);

}

From source file:org.commoncrawl.mapred.pipelineV3.crawllistgen.GenBundlesStep.java

License:Open Source License

public void configure(JobConf job) {

    _jobConf = job;/*from w w  w .  j  a va 2 s. c  om*/

    crawlerCount = job.getInt(CrawlEnvironment.PROPERTY_NUM_CRAWLERS, CrawlEnvironment.CRAWLERS.length);

    partitionNumber = job.getInt("mapred.task.partition", -1);

    try {
        FileSystem fs = FileSystem.get(job);
        Path workPath = FileOutputFormat.getOutputPath(job);
        debugURLStream = fs.create(new Path(workPath, "debugURLS-" + NUMBER_FORMAT.format(partitionNumber)));
        urlDebugURLWriter = new OutputStreamWriter(debugURLStream, Charset.forName("UTF-8"));
        _emittedURLSFilter = new URLFPBloomFilter(NUM_ELEMENTS, NUM_HASH_FUNCTIONS, NUM_BITS);
    } catch (IOException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new RuntimeException(e);
    }

}

From source file:org.commoncrawl.mapred.segmenter.BundleKeyPartitioner.java

License:Open Source License

public void configure(JobConf job) {
    // get buckets per crawler ... 
    bucketsPerCrawler = job.getInt(CrawlEnvironment.PROPERTY_NUM_BUCKETS_PER_CRAWLER, 8);
}

From source file:org.dkpro.bigdata.hadoop.UIMAMapReduceBase.java

License:Open Source License

@Override
public void configure(JobConf job) {
    try {/*from   ww w.j  av  a 2s. co  m*/
        this.job = job;
        this.inputName = job.get("mapred.input.dir");
        this.taskId = job.get("mapred.task.id");
        this.mapOutputValueClass = job.getMapOutputValueClass();
        this.outputValueClass = job.getOutputValueClass();
        this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100);
        final EngineFactory engineFactory = (EngineFactory) Class
                .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance();
        engineFactory.configure(job);

        final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job);

        // replace the $dir variable within the configuration.
        this.fs = FileSystem.get(job);
        this.localFS = FileSystem.getLocal(job);
        if (job.getBoolean("dkpro.output.onedirpertask", true)) {
            this.working_dir = new Path("uima_output_" + job.get("mapred.task.id"));
        } else {
            this.working_dir = new Path("uima_output");
        }
        final Path outputPath = FileOutputFormat.getOutputPath(job);
        this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName()));
        this.localFS.mkdirs(this.results_dir);
        final String[] resources = job.get("dkpro.resources", "").split(",");
        sLogger.info("Writing local data to: " + this.results_dir);
        this.resourceURIs = new TreeMap<String, URL>();
        for (final String resource : resources) {
            final URL r = job.getResource(resource);
            if (r != null && !resource.isEmpty()) {
                this.resourceURIs.put(resource, r);
            }

        }
        Map<String, String> variableValues = new HashMap<String, String>();
        variableValues.put("\\$dir", this.results_dir.toString());
        variableValues.put("\\$input", this.inputName);
        variableValues.put("\\$taskid", this.taskId);
        Path[] cacheFiles = DistributedCache.getLocalCacheFiles(job);
        if (cacheFiles != null) {
            for (Path cacheFile : cacheFiles) {
                variableValues.put("^\\$cache/" + cacheFile.getName(), cacheFile.toUri().getPath());
            }
        }
        for (final Entry<String, URL> resource : this.resourceURIs.entrySet()) {
            variableValues.put("\\$" + resource, resource.getValue().toString());
        }
        AnalysisEngineUtil.replaceVariables(engineDescription, variableValues);
        this.engine = createEngine(engineDescription);

    } catch (final Exception e) {
        sLogger.fatal("Error while configuring pipeline", e);
        e.printStackTrace();
        throw new RuntimeException(e);
    }

}

From source file:org.dkpro.bigdata.io.hadoop.GenericMultiLineRecordReader.java

License:Apache License

public GenericMultiLineRecordReader(FileSplit split, JobConf jobConf, Reporter reporter) throws IOException {
    lineReader = new LineRecordReader(jobConf, split);
    this.split = split;
    maxNumLinesPerSplit = jobConf.getInt("dkpro.input.maxlinesperrecord", 1);
}

From source file:org.hxx.hadoop.URLCountPartitioner.java

License:Apache License

public void configure(JobConf job) {
    seed = job.getInt("partition.url.seed", 0);
    normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_PARTITION);

    topn = job.getLong(Generator.GENERATOR_TOP_N, 100000);
    hostn = job.getInt(Generator.GENERATOR_MAX_COUNT, -1);
    // cntStr = job.get(GeneratorHbase.GENERATL_CNT);// ?
    // int reduceNum = job.getInt(GeneratorHbase.GENERATL_REDUCENUM, 1);
    // initPart(reduceNum);
}

From source file:org.hypertable.hadoop.mapred.RowInputFormat.java

License:Open Source License

public RecordReader<BytesWritable, Row> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
        throws IOException {

    try {//from   w  w w.j  a  va  2  s. c o m
        TableSplit ts = (TableSplit) split;
        if (m_namespace == null) {
            m_namespace = job.get(NAMESPACE);
        }
        if (m_tablename == null) {
            m_tablename = job.get(TABLE);
        }
        ScanSpec scan_spec = ts.createScanSpec(m_base_spec);

        if (m_client == null) {
            int framesize = job.getInt(THRIFT_FRAMESIZE, 0);
            if (framesize == 0)
                framesize = job.getInt(THRIFT_FRAMESIZE2, 0);
            if (framesize != 0)
                m_client = ThriftClient.create("localhost", 15867, 1600000, true, framesize);
            else
                m_client = ThriftClient.create("localhost", 15867);
        }
        return new HypertableRecordReader(m_client, m_namespace, m_tablename, scan_spec);
    } catch (TTransportException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    } catch (TException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    }
}

From source file:org.hypertable.hadoop.mapred.RowInputFormat.java

License:Open Source License

public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    long ns = 0;/*from  w ww.  java2  s  .c  o  m*/
    try {
        if (m_client == null) {
            int framesize = job.getInt(THRIFT_FRAMESIZE, 0);
            if (framesize == 0)
                framesize = job.getInt(THRIFT_FRAMESIZE2, 0);
            if (framesize != 0)
                m_client = ThriftClient.create("localhost", 15867, 1600000, true, framesize);
            else
                m_client = ThriftClient.create("localhost", 15867);
        }

        String namespace, tablename;
        if (m_namespace == null)
            namespace = job.get(NAMESPACE);
        else
            namespace = m_namespace;
        if (m_tablename == null)
            tablename = job.get(TABLE);
        else
            tablename = m_tablename;

        ns = m_client.open_namespace(namespace);
        List<org.hypertable.thriftgen.TableSplit> tsplits = m_client.get_table_splits(ns, tablename);
        InputSplit[] splits = new InputSplit[tsplits.size()];

        try {
            int pos = 0;
            for (final org.hypertable.thriftgen.TableSplit ts : tsplits) {
                TableSplit split = new TableSplit(tablename.getBytes("UTF-8"), ts.start_row, ts.end_row,
                        ts.ip_address);
                splits[pos++] = (InputSplit) split;
            }
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
            System.exit(-1);
        }

        return splits;
    } catch (TTransportException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    } catch (TException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
    } finally {
        if (ns != 0) {
            try {
                m_client.close_namespace(ns);
            } catch (Exception e) {
                e.printStackTrace();
                throw new IOException(e.getMessage());
            }
        }
    }
}