List of usage examples for org.apache.hadoop.mapred JobConf getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:org.commoncrawl.hadoop.io.JetS3tARCSource.java
License:Open Source License
/** * @inheritDoc// w w w . j a va 2 s . com */ @Override protected void configureImpl(JobConf job) { try { // Pull credentials from the configuration String awsAccessKeyId = getProperty(job, P_AWS_ACCESS_KEY_ID); String awsSecretAccessKey = getProperty(job, P_AWS_SECRET_ACCESS_KEY); String bucketName = getProperty(job, P_BUCKET_NAME); // Instantiate JetS3t classes AWSCredentials awsCredentials = new AWSCredentials(awsAccessKeyId, awsSecretAccessKey); service = new RestS3Service(awsCredentials); bucket = new S3Bucket(bucketName); maxTries = job.getInt(P_MAX_TRIES, 4); } catch (S3ServiceException e) { throw new RuntimeException(e); } }
From source file:org.commoncrawl.hadoop.io.S3GetMetdataJob.java
License:Open Source License
public void configure(JobConf job) { _attemptID = TaskAttemptID.forName(job.get("mapred.task.id")); _maxAttemptsPerTask = job.getInt("mapred.max.tracker.failures", 4); _splitDetails = job.get(ARCSplitReader.SPLIT_DETAILS, "Spit Details Unknown"); }
From source file:org.commoncrawl.hadoop.template.SampleHadoopJob.java
License:Open Source License
/** overloaded to initialize class variables from job config **/ @Override/*from www . ja v a2 s.com*/ public void configure(JobConf job) { attemptID = TaskAttemptID.forName(job.get("mapred.task.id")); maxAttemptTaskId = job.getInt("mapred.max.tracker.failures", 4) - 1; splitDetails = job.get(ARCSplitReader.SPLIT_DETAILS, "Spit Details Unknown"); pattern = Pattern.compile(job.get("mapred.mapper.regex")); group = job.getInt("mapred.mapper.regex.group", 0); }
From source file:org.commoncrawl.mapred.pipelineV3.crawllistgen.GenBundlesStep.java
License:Open Source License
public void configure(JobConf job) { _jobConf = job;/*from w w w . j a va 2 s. c om*/ crawlerCount = job.getInt(CrawlEnvironment.PROPERTY_NUM_CRAWLERS, CrawlEnvironment.CRAWLERS.length); partitionNumber = job.getInt("mapred.task.partition", -1); try { FileSystem fs = FileSystem.get(job); Path workPath = FileOutputFormat.getOutputPath(job); debugURLStream = fs.create(new Path(workPath, "debugURLS-" + NUMBER_FORMAT.format(partitionNumber))); urlDebugURLWriter = new OutputStreamWriter(debugURLStream, Charset.forName("UTF-8")); _emittedURLSFilter = new URLFPBloomFilter(NUM_ELEMENTS, NUM_HASH_FUNCTIONS, NUM_BITS); } catch (IOException e) { LOG.error(StringUtils.stringifyException(e)); throw new RuntimeException(e); } }
From source file:org.commoncrawl.mapred.segmenter.BundleKeyPartitioner.java
License:Open Source License
public void configure(JobConf job) { // get buckets per crawler ... bucketsPerCrawler = job.getInt(CrawlEnvironment.PROPERTY_NUM_BUCKETS_PER_CRAWLER, 8); }
From source file:org.dkpro.bigdata.hadoop.UIMAMapReduceBase.java
License:Open Source License
@Override public void configure(JobConf job) { try {/*from ww w.j av a 2s. co m*/ this.job = job; this.inputName = job.get("mapred.input.dir"); this.taskId = job.get("mapred.task.id"); this.mapOutputValueClass = job.getMapOutputValueClass(); this.outputValueClass = job.getOutputValueClass(); this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100); final EngineFactory engineFactory = (EngineFactory) Class .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance(); engineFactory.configure(job); final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job); // replace the $dir variable within the configuration. this.fs = FileSystem.get(job); this.localFS = FileSystem.getLocal(job); if (job.getBoolean("dkpro.output.onedirpertask", true)) { this.working_dir = new Path("uima_output_" + job.get("mapred.task.id")); } else { this.working_dir = new Path("uima_output"); } final Path outputPath = FileOutputFormat.getOutputPath(job); this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName())); this.localFS.mkdirs(this.results_dir); final String[] resources = job.get("dkpro.resources", "").split(","); sLogger.info("Writing local data to: " + this.results_dir); this.resourceURIs = new TreeMap<String, URL>(); for (final String resource : resources) { final URL r = job.getResource(resource); if (r != null && !resource.isEmpty()) { this.resourceURIs.put(resource, r); } } Map<String, String> variableValues = new HashMap<String, String>(); variableValues.put("\\$dir", this.results_dir.toString()); variableValues.put("\\$input", this.inputName); variableValues.put("\\$taskid", this.taskId); Path[] cacheFiles = DistributedCache.getLocalCacheFiles(job); if (cacheFiles != null) { for (Path cacheFile : cacheFiles) { variableValues.put("^\\$cache/" + cacheFile.getName(), cacheFile.toUri().getPath()); } } for (final Entry<String, URL> resource : this.resourceURIs.entrySet()) { variableValues.put("\\$" + resource, resource.getValue().toString()); } AnalysisEngineUtil.replaceVariables(engineDescription, variableValues); this.engine = createEngine(engineDescription); } catch (final Exception e) { sLogger.fatal("Error while configuring pipeline", e); e.printStackTrace(); throw new RuntimeException(e); } }
From source file:org.dkpro.bigdata.io.hadoop.GenericMultiLineRecordReader.java
License:Apache License
public GenericMultiLineRecordReader(FileSplit split, JobConf jobConf, Reporter reporter) throws IOException { lineReader = new LineRecordReader(jobConf, split); this.split = split; maxNumLinesPerSplit = jobConf.getInt("dkpro.input.maxlinesperrecord", 1); }
From source file:org.hxx.hadoop.URLCountPartitioner.java
License:Apache License
public void configure(JobConf job) { seed = job.getInt("partition.url.seed", 0); normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_PARTITION); topn = job.getLong(Generator.GENERATOR_TOP_N, 100000); hostn = job.getInt(Generator.GENERATOR_MAX_COUNT, -1); // cntStr = job.get(GeneratorHbase.GENERATL_CNT);// ? // int reduceNum = job.getInt(GeneratorHbase.GENERATL_REDUCENUM, 1); // initPart(reduceNum); }
From source file:org.hypertable.hadoop.mapred.RowInputFormat.java
License:Open Source License
public RecordReader<BytesWritable, Row> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { try {//from w w w.j a va 2 s. c o m TableSplit ts = (TableSplit) split; if (m_namespace == null) { m_namespace = job.get(NAMESPACE); } if (m_tablename == null) { m_tablename = job.get(TABLE); } ScanSpec scan_spec = ts.createScanSpec(m_base_spec); if (m_client == null) { int framesize = job.getInt(THRIFT_FRAMESIZE, 0); if (framesize == 0) framesize = job.getInt(THRIFT_FRAMESIZE2, 0); if (framesize != 0) m_client = ThriftClient.create("localhost", 15867, 1600000, true, framesize); else m_client = ThriftClient.create("localhost", 15867); } return new HypertableRecordReader(m_client, m_namespace, m_tablename, scan_spec); } catch (TTransportException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } catch (TException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } }
From source file:org.hypertable.hadoop.mapred.RowInputFormat.java
License:Open Source License
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { long ns = 0;/*from w ww. java2 s .c o m*/ try { if (m_client == null) { int framesize = job.getInt(THRIFT_FRAMESIZE, 0); if (framesize == 0) framesize = job.getInt(THRIFT_FRAMESIZE2, 0); if (framesize != 0) m_client = ThriftClient.create("localhost", 15867, 1600000, true, framesize); else m_client = ThriftClient.create("localhost", 15867); } String namespace, tablename; if (m_namespace == null) namespace = job.get(NAMESPACE); else namespace = m_namespace; if (m_tablename == null) tablename = job.get(TABLE); else tablename = m_tablename; ns = m_client.open_namespace(namespace); List<org.hypertable.thriftgen.TableSplit> tsplits = m_client.get_table_splits(ns, tablename); InputSplit[] splits = new InputSplit[tsplits.size()]; try { int pos = 0; for (final org.hypertable.thriftgen.TableSplit ts : tsplits) { TableSplit split = new TableSplit(tablename.getBytes("UTF-8"), ts.start_row, ts.end_row, ts.ip_address); splits[pos++] = (InputSplit) split; } } catch (UnsupportedEncodingException e) { e.printStackTrace(); System.exit(-1); } return splits; } catch (TTransportException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } catch (TException e) { e.printStackTrace(); throw new IOException(e.getMessage()); } finally { if (ns != 0) { try { m_client.close_namespace(ns); } catch (Exception e) { e.printStackTrace(); throw new IOException(e.getMessage()); } } } }