Example usage for org.apache.hadoop.mapred JobConf getInt

List of usage examples for org.apache.hadoop.mapred JobConf getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:com.scaleoutsoftware.soss.hserver.NamedMapInputFormatMapred.java

License:Apache License

@Override
public RecordReader getRecordReader(InputSplit inputSplit, JobConf configuration, Reporter reporter)
        throws IOException {
    int mapId = configuration.getInt(inputAppIdProperty, 0);
    Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration
            .getClass(inputNamedMapKeySerializerProperty, null);
    Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration
            .getClass(inputNamedMapValueSerializerProperty, null);

    if (mapId == 0 || keySerializerClass == null || valueSerializerClass == null) {
        throw new IOException("Input format is not configured with a valid NamedMap.");
    }//from   w  ww .  java2  s .c  o  m

    CustomSerializer<K> keySerializer = ReflectionUtils.newInstance(keySerializerClass, configuration);
    keySerializer.setObjectClass((Class<K>) configuration.getClass(inputNamedMapKeyProperty, null));
    CustomSerializer<V> valueSerializer = ReflectionUtils.newInstance(valueSerializerClass, configuration);
    valueSerializer.setObjectClass((Class<V>) configuration.getClass(inputNamedMapValueProperty, null));
    int smOrdinal = configuration.getInt(SERIALIZATION_MODE, SerializationMode.DEFAULT.ordinal());
    SerializationMode serializationMode = SerializationMode.values()[smOrdinal];

    return new NamedMapRecordReaderMapred(inputSplit, configuration, mapId, keySerializer, valueSerializer,
            serializationMode);
}

From source file:com.scaleoutsoftware.soss.hserver.NamedMapOutputFormatMapred.java

License:Apache License

@Override
public RecordWriter getRecordWriter(FileSystem fileSystem, JobConf configuration, String s,
        Progressable progressable) throws IOException {
    String mapName = configuration.get(outputNamedMapProperty);
    Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration
            .getClass(outputNamedMapKeySerializerProperty, null);
    Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration
            .getClass(outputNamedMapValueSerializerProperty, null);
    int smOrdinal = configuration.getInt(SERIALIZATION_MODE, SerializationMode.DEFAULT.ordinal());
    int amOrdinal = configuration.getInt(AVAILABILITY_MODE, AvailabilityMode.USE_REPLICAS.ordinal());
    SerializationMode serializationMode = SerializationMode.values()[smOrdinal];
    AvailabilityMode availabilityMode = AvailabilityMode.values()[amOrdinal];

    if (mapName == null || mapName.length() == 0 || keySerializerClass == null
            || valueSerializerClass == null) {
        throw new IOException("Input format is not configured with a valid NamedMap.");
    }/*from   w w  w  .  j a v  a  2 s .  c o  m*/

    CustomSerializer<K> keySerializer = ReflectionUtils.newInstance(keySerializerClass, configuration);
    keySerializer.setObjectClass((Class<K>) configuration.getClass(outputNamedMapKeyProperty, null));
    CustomSerializer<V> valueSerializer = ReflectionUtils.newInstance(valueSerializerClass, configuration);
    valueSerializer.setObjectClass((Class<V>) configuration.getClass(outputNamedMapValueProperty, null));
    NamedMap<K, V> namedMap = NamedMapFactory.getMap(mapName, keySerializer, valueSerializer);
    namedMap.setAvailabilityMode(availabilityMode);
    namedMap.setSerializationMode(serializationMode);

    return new NamedMapRecordWriter<K, V>(namedMap);
}

From source file:com.scaleunlimited.helpful.tools.AnalyzeEmail.java

License:Apache License

private static JobConf getDefaultJobConf() throws IOException {
    JobClient jobClient = new JobClient(new JobConf());
    ClusterStatus status = jobClient.getClusterStatus();
    int trackers = status.getTaskTrackers();

    JobConf conf = new JobConf();
    conf.setNumMapTasks(trackers * 10);// ww w  .j a  v a  2 s .c om

    conf.setNumReduceTasks((trackers * conf.getInt("mapred.tasktracker.reduce.tasks.maximum", 2)));

    conf.setMapSpeculativeExecution(false);
    conf.setReduceSpeculativeExecution(false);
    conf.set("mapred.child.java.opts", "-server -Xmx512m -Xss128k");

    // Should match the value used for Xss above. Note no 'k' suffix for the ulimit command.
    // New support that one day will be in Hadoop.
    conf.set("mapred.child.ulimit.stack", "128");

    return conf;
}

From source file:com.TCG.Nutch_DNS.HostDbReducer.java

License:Apache License

public void configure(JobConf job) {
    retryMax = job.getInt("db.fetch.retry.max", 3);
    scfilters = new ScoringFilters(job);
    additionsAllowed = job.getBoolean(HostDb.CRAWLDB_ADDITIONS_ALLOWED, true);
    maxInterval = job.getInt("db.fetch.interval.max", 0);
    schedule = FetchScheduleFactory.getFetchSchedule(job);
    int maxLinks = job.getInt("db.update.max.inlinks", 10000);
    linked = new InlinkPriorityQueue(maxLinks);
}

From source file:com.tomslabs.grid.avro.TextTypedBytesToAvroOutputFormat.java

License:Apache License

public RecordWriter<TypedBytesWritable, TypedBytesWritable> getRecordWriter(FileSystem ignore, JobConf job,
        String name, Progressable prog) throws IOException {

    boolean isMapOnly = job.getNumReduceTasks() == 0;
    Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);

    final DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(
            new GenericDatumWriter<GenericRecord>(schema));

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        writer.setCodec(CodecFactory.deflateCodec(level));
    }//from w w  w.  ja v a2s .c  o  m

    // copy metadata from job
    for (Map.Entry<String, String> e : job) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                    URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
    }

    Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return new AvroRecordWriter(writer, schema);
}

From source file:com.tuplejump.calliope.hadoop.ColumnFamilyInputFormat.java

License:Apache License

public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, Column>> getRecordReader(
        org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {
    TaskAttemptContext tac = HadoopCompat.newMapContext(jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID)), null, null, null, new ReporterWrapper(reporter),
            null);/* ww w .  j  a v a 2  s  .com*/

    ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader(
            jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT));
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

From source file:com.yolodata.tbana.hadoop.mapred.csv.CSVNLineInputFormat.java

License:Open Source License

public static int getNumLinesPerSplit(JobConf job) {
    return job.getInt(LINES_PER_MAP, DEFAULT_LINES_PER_MAP);
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.UIMAMapReduceBase.java

License:Open Source License

@Override
public void configure(JobConf job) {
    try {/*from www  .j av a  2s .c  om*/
        this.job = job;
        this.mapOutputValueClass = job.getMapOutputValueClass();
        this.outputValueClass = job.getOutputValueClass();
        this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100);
        final EngineFactory engineFactory = (EngineFactory) Class
                .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance();
        engineFactory.configure(job);

        final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job);

        // replace the $dir variable within the configuration.
        this.fs = FileSystem.get(job);
        this.localFS = FileSystem.getLocal(job);
        this.working_dir = new Path("uima_output_" + job.get("mapred.task.id"));
        final Path outputPath = FileOutputFormat.getOutputPath(job);
        this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName()));
        this.localFS.mkdirs(this.results_dir);
        final String[] resources = job.get("dkpro.resources", "").split(",");
        sLogger.info("Writing local data to: " + this.results_dir);
        this.resourceURIs = new TreeMap<String, URL>();
        for (final String resource : resources) {
            final URL r = job.getResource(resource);
            if (r != null && !resource.isEmpty()) {
                this.resourceURIs.put(resource, r);
            }

        }
        replaceRecursively(engineDescription);
        this.engine = createEngine(engineDescription);

    } catch (final Exception e) {
        sLogger.fatal("Error while configuring pipeline", e);
        e.printStackTrace();
        throw new RuntimeException(e);
    }

}

From source file:edu.berkeley.chukwa_xtrace.TestXtrExtract.java

License:Apache License

public void testArchiving() throws Exception {

    System.out.println("starting archive test");
    Configuration conf = new Configuration();
    System.setProperty("hadoop.log.dir", System.getProperty("test.build.data", "/tmp"));
    MiniDFSCluster dfs = new MiniDFSCluster(conf, NUM_HADOOP_SLAVES, true, null);
    FileSystem fileSys = dfs.getFileSystem();
    fileSys.delete(OUTPUT_DIR, true);//nuke output dir

    writeASinkFile(conf, fileSys, INPUT_DIR, 1000);

    FileStatus fstat = fileSys.getFileStatus(INPUT_DIR);
    assertTrue(fstat.getLen() > 10);// w w w .j  a  v a2 s .  c  o m

    System.out.println("filesystem is " + fileSys.getUri());
    conf.set("fs.default.name", fileSys.getUri().toString());
    conf.setInt("io.sort.mb", 1);
    conf.setInt("io.sort.factor", 5);
    conf.setInt("mapred.tasktracker.map.tasks.maximum", 2);
    conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 2);

    MiniMRCluster mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1);
    String[] archiveArgs = { INPUT_DIR.toString(), fileSys.getUri().toString() + OUTPUT_DIR.toString() };

    JobConf jc = mr.createJobConf(new JobConf(conf));
    assertEquals("true", jc.get("archive.groupByClusterName"));
    assertEquals(1, jc.getInt("io.sort.mb", 5));

    int returnVal = ToolRunner.run(jc, new XtrExtract(), archiveArgs);
    assertEquals(0, returnVal);
    fstat = fileSys.getFileStatus(new Path("/chukwa/archives/foocluster/HadoopLogProcessor_2008_05_29.arc"));
    assertTrue(fstat.getLen() > 10);

    Thread.sleep(1000);

    System.out.println("done!");
}

From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapper.java

License:Apache License

public void configure(JobConf job) {
    super.configure(job);
    //disable the auto increment of the counter. For streaming, no of 
    //processed records could be different(equal or less) than the no of 
    //records input.
    SkipBadRecords.setAutoIncrMapperProcCount(job, false);
    skipping = job.getBoolean("mapred.skip.on", false);
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class)
            .getCanonicalName();//  w w w  .ja  va  2 s.co  m
    ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName());

    try {
        mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
        mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
        numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
    } catch (UnsupportedEncodingException e) {
        throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
    }
}