List of usage examples for org.apache.hadoop.mapred JobConf getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:com.scaleoutsoftware.soss.hserver.NamedMapInputFormatMapred.java
License:Apache License
@Override public RecordReader getRecordReader(InputSplit inputSplit, JobConf configuration, Reporter reporter) throws IOException { int mapId = configuration.getInt(inputAppIdProperty, 0); Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration .getClass(inputNamedMapKeySerializerProperty, null); Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration .getClass(inputNamedMapValueSerializerProperty, null); if (mapId == 0 || keySerializerClass == null || valueSerializerClass == null) { throw new IOException("Input format is not configured with a valid NamedMap."); }//from w ww . java2 s .c o m CustomSerializer<K> keySerializer = ReflectionUtils.newInstance(keySerializerClass, configuration); keySerializer.setObjectClass((Class<K>) configuration.getClass(inputNamedMapKeyProperty, null)); CustomSerializer<V> valueSerializer = ReflectionUtils.newInstance(valueSerializerClass, configuration); valueSerializer.setObjectClass((Class<V>) configuration.getClass(inputNamedMapValueProperty, null)); int smOrdinal = configuration.getInt(SERIALIZATION_MODE, SerializationMode.DEFAULT.ordinal()); SerializationMode serializationMode = SerializationMode.values()[smOrdinal]; return new NamedMapRecordReaderMapred(inputSplit, configuration, mapId, keySerializer, valueSerializer, serializationMode); }
From source file:com.scaleoutsoftware.soss.hserver.NamedMapOutputFormatMapred.java
License:Apache License
@Override public RecordWriter getRecordWriter(FileSystem fileSystem, JobConf configuration, String s, Progressable progressable) throws IOException { String mapName = configuration.get(outputNamedMapProperty); Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration .getClass(outputNamedMapKeySerializerProperty, null); Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration .getClass(outputNamedMapValueSerializerProperty, null); int smOrdinal = configuration.getInt(SERIALIZATION_MODE, SerializationMode.DEFAULT.ordinal()); int amOrdinal = configuration.getInt(AVAILABILITY_MODE, AvailabilityMode.USE_REPLICAS.ordinal()); SerializationMode serializationMode = SerializationMode.values()[smOrdinal]; AvailabilityMode availabilityMode = AvailabilityMode.values()[amOrdinal]; if (mapName == null || mapName.length() == 0 || keySerializerClass == null || valueSerializerClass == null) { throw new IOException("Input format is not configured with a valid NamedMap."); }/*from w w w . j a v a 2 s . c o m*/ CustomSerializer<K> keySerializer = ReflectionUtils.newInstance(keySerializerClass, configuration); keySerializer.setObjectClass((Class<K>) configuration.getClass(outputNamedMapKeyProperty, null)); CustomSerializer<V> valueSerializer = ReflectionUtils.newInstance(valueSerializerClass, configuration); valueSerializer.setObjectClass((Class<V>) configuration.getClass(outputNamedMapValueProperty, null)); NamedMap<K, V> namedMap = NamedMapFactory.getMap(mapName, keySerializer, valueSerializer); namedMap.setAvailabilityMode(availabilityMode); namedMap.setSerializationMode(serializationMode); return new NamedMapRecordWriter<K, V>(namedMap); }
From source file:com.scaleunlimited.helpful.tools.AnalyzeEmail.java
License:Apache License
private static JobConf getDefaultJobConf() throws IOException { JobClient jobClient = new JobClient(new JobConf()); ClusterStatus status = jobClient.getClusterStatus(); int trackers = status.getTaskTrackers(); JobConf conf = new JobConf(); conf.setNumMapTasks(trackers * 10);// ww w .j a v a 2 s .c om conf.setNumReduceTasks((trackers * conf.getInt("mapred.tasktracker.reduce.tasks.maximum", 2))); conf.setMapSpeculativeExecution(false); conf.setReduceSpeculativeExecution(false); conf.set("mapred.child.java.opts", "-server -Xmx512m -Xss128k"); // Should match the value used for Xss above. Note no 'k' suffix for the ulimit command. // New support that one day will be in Hadoop. conf.set("mapred.child.ulimit.stack", "128"); return conf; }
From source file:com.TCG.Nutch_DNS.HostDbReducer.java
License:Apache License
public void configure(JobConf job) { retryMax = job.getInt("db.fetch.retry.max", 3); scfilters = new ScoringFilters(job); additionsAllowed = job.getBoolean(HostDb.CRAWLDB_ADDITIONS_ALLOWED, true); maxInterval = job.getInt("db.fetch.interval.max", 0); schedule = FetchScheduleFactory.getFetchSchedule(job); int maxLinks = job.getInt("db.update.max.inlinks", 10000); linked = new InlinkPriorityQueue(maxLinks); }
From source file:com.tomslabs.grid.avro.TextTypedBytesToAvroOutputFormat.java
License:Apache License
public RecordWriter<TypedBytesWritable, TypedBytesWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); final DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>( new GenericDatumWriter<GenericRecord>(schema)); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); }//from w w w. ja v a2s .c o m // copy metadata from job for (Map.Entry<String, String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1")); } Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new AvroRecordWriter(writer, schema); }
From source file:com.tuplejump.calliope.hadoop.ColumnFamilyInputFormat.java
License:Apache License
public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, Column>> getRecordReader( org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { TaskAttemptContext tac = HadoopCompat.newMapContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID)), null, null, null, new ReporterWrapper(reporter), null);/* ww w . j a v a 2 s .com*/ ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader( jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT)); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac); return recordReader; }
From source file:com.yolodata.tbana.hadoop.mapred.csv.CSVNLineInputFormat.java
License:Open Source License
public static int getNumLinesPerSplit(JobConf job) { return job.getInt(LINES_PER_MAP, DEFAULT_LINES_PER_MAP); }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.UIMAMapReduceBase.java
License:Open Source License
@Override public void configure(JobConf job) { try {/*from www .j av a 2s .c om*/ this.job = job; this.mapOutputValueClass = job.getMapOutputValueClass(); this.outputValueClass = job.getOutputValueClass(); this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100); final EngineFactory engineFactory = (EngineFactory) Class .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance(); engineFactory.configure(job); final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job); // replace the $dir variable within the configuration. this.fs = FileSystem.get(job); this.localFS = FileSystem.getLocal(job); this.working_dir = new Path("uima_output_" + job.get("mapred.task.id")); final Path outputPath = FileOutputFormat.getOutputPath(job); this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName())); this.localFS.mkdirs(this.results_dir); final String[] resources = job.get("dkpro.resources", "").split(","); sLogger.info("Writing local data to: " + this.results_dir); this.resourceURIs = new TreeMap<String, URL>(); for (final String resource : resources) { final URL r = job.getResource(resource); if (r != null && !resource.isEmpty()) { this.resourceURIs.put(resource, r); } } replaceRecursively(engineDescription); this.engine = createEngine(engineDescription); } catch (final Exception e) { sLogger.fatal("Error while configuring pipeline", e); e.printStackTrace(); throw new RuntimeException(e); } }
From source file:edu.berkeley.chukwa_xtrace.TestXtrExtract.java
License:Apache License
public void testArchiving() throws Exception { System.out.println("starting archive test"); Configuration conf = new Configuration(); System.setProperty("hadoop.log.dir", System.getProperty("test.build.data", "/tmp")); MiniDFSCluster dfs = new MiniDFSCluster(conf, NUM_HADOOP_SLAVES, true, null); FileSystem fileSys = dfs.getFileSystem(); fileSys.delete(OUTPUT_DIR, true);//nuke output dir writeASinkFile(conf, fileSys, INPUT_DIR, 1000); FileStatus fstat = fileSys.getFileStatus(INPUT_DIR); assertTrue(fstat.getLen() > 10);// w w w .j a v a2 s . c o m System.out.println("filesystem is " + fileSys.getUri()); conf.set("fs.default.name", fileSys.getUri().toString()); conf.setInt("io.sort.mb", 1); conf.setInt("io.sort.factor", 5); conf.setInt("mapred.tasktracker.map.tasks.maximum", 2); conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 2); MiniMRCluster mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1); String[] archiveArgs = { INPUT_DIR.toString(), fileSys.getUri().toString() + OUTPUT_DIR.toString() }; JobConf jc = mr.createJobConf(new JobConf(conf)); assertEquals("true", jc.get("archive.groupByClusterName")); assertEquals(1, jc.getInt("io.sort.mb", 5)); int returnVal = ToolRunner.run(jc, new XtrExtract(), archiveArgs); assertEquals(0, returnVal); fstat = fileSys.getFileStatus(new Path("/chukwa/archives/foocluster/HadoopLogProcessor_2008_05_29.arc")); assertTrue(fstat.getLen() > 10); Thread.sleep(1000); System.out.println("done!"); }
From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapper.java
License:Apache License
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); skipping = job.getBoolean("mapred.skip.on", false); String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class) .getCanonicalName();// w w w .ja va 2 s.co m ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()); try { mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8"); mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8"); numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }