Example usage for org.apache.hadoop.mapred JobConf getClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getClass.

Prototype

public Class<?> getClass(String name, Class<?> defaultValue)

Source Link

Document

Get the value of the name property as a Class.

Usage

From source file:com.digitalpebble.behemoth.tika.TikaMapper.java

License:Apache License

@Override
public void configure(JobConf job) {

    String handlerName = job.get(TikaConstants.TIKA_PROCESSOR_KEY);
    if (handlerName != null) {
        Class handlerClass = job.getClass(handlerName, TikaProcessor.class);
        try {// w ww. ja  v  a  2s.  c  o  m
            processor = (TikaProcessor) handlerClass.newInstance();
        } catch (InstantiationException e) {
            LOG.error("Exception", e);
            // TODO: what's the best way to do this?
            throw new RuntimeException(e);
        } catch (IllegalAccessException e) {
            LOG.error("Exception", e);
            throw new RuntimeException(e);
        }
    } else {
        processor = new TikaProcessor();
    }
    processor.setConf(job);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static Converter getInputConverter(JobConf job, byte input) {
    Converter inputConverter;// www  .j  a v  a 2s. c o  m
    try {
        inputConverter = (Converter) job
                .getClass(INPUT_CONVERTER_CLASS_PREFIX_CONFIG + input, IdenticalConverter.class).newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return inputConverter;
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static Converter getOuputConverter(JobConf job, int i) {
    Converter outputConverter;//from  www .ja  va  2  s .c  om
    try {
        outputConverter = (Converter) job
                .getClass(OUTPUT_CONVERTER_CLASS_PREFIX_CONFIG + i, IdenticalConverter.class).newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return outputConverter;
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

@SuppressWarnings("unchecked")
public static Class<Writable> getInputKeyClass(JobConf job, byte input) {
    return (Class<Writable>) job.getClass(INPUT_KEY_CLASS_PREFIX_CONFIG + input, MatrixIndexes.class);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

@SuppressWarnings("unchecked")
public static Class<Writable> getInputValueClass(JobConf job, byte input) {
    return (Class<Writable>) job.getClass(INPUT_VALUE_CLASS_PREFIX_CONFIG + input, DoubleWritable.class);
}

From source file:com.ibm.bi.dml.runtime.matrix.sort.SamplingSortMRInputFormat.java

License:Open Source License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param conf the job to sample//  w  w w .  ja va2 s.co m
 * @param partFile where to write the output file to
 * @throws IOException if something goes wrong
* @throws IllegalAccessException 
* @throws InstantiationException 
 */
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile)
        throws IOException, InstantiationException, IllegalAccessException {
    SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
    Sampler sampler = new Sampler();

    Class<? extends WritableComparable> targetKeyClass;
    targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS,
            WritableComparable.class);
    //get input converter information
    int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
    int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);

    //indicate whether the matrix value in this mapper is a matrix cell or a matrix block
    int partitions = conf.getNumReduceTasks();

    long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    // take N samples from different parts of the input

    int totalcount = 0;
    for (int i = 0; i < samples; ++i) {
        SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat
                .getRecordReader(splits[sampleStep * i], conf, null);
        int count = 0;
        WritableComparable key = (WritableComparable) reader.createKey();
        Writable value = (Writable) reader.createValue();
        while (reader.next(key, value) && count < recordsPerSample) {
            Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
            inputConverter.setBlockSize(brlen, bclen);
            inputConverter.convert(key, value);
            while (inputConverter.hasNext()) {
                Pair pair = inputConverter.next();
                if (pair.getKey() instanceof DoubleWritable) {
                    sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
                } else if (pair.getValue() instanceof MatrixCell) {
                    sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
                } else
                    throw new IOException("SamplingSortMRInputFormat unsupported key/value class: "
                            + pair.getKey().getClass() + ":" + pair.getValue().getClass());

                count++;
            }
            key = (WritableComparable) reader.createKey();
            value = (Writable) reader.createValue();
        }
        totalcount += count;
    }

    if (totalcount == 0) //empty input files
        sampler.addValue(new DoubleWritable(0));

    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }

    //note: key value always double/null as expected by partitioner
    SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class,
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    int index0 = -1, i = 0;
    boolean lessthan0 = true;
    for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
        writer.append(splitValue, nullValue);
        if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
            index0 = i;
            lessthan0 = false;
        }
        i++;
    }
    if (lessthan0)
        index0 = partitions - 1;
    writer.close();

    return index0;
}

From source file:com.scaleoutsoftware.soss.hserver.NamedMapInputFormatMapred.java

License:Apache License

@Override
public RecordReader getRecordReader(InputSplit inputSplit, JobConf configuration, Reporter reporter)
        throws IOException {
    int mapId = configuration.getInt(inputAppIdProperty, 0);
    Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration
            .getClass(inputNamedMapKeySerializerProperty, null);
    Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration
            .getClass(inputNamedMapValueSerializerProperty, null);

    if (mapId == 0 || keySerializerClass == null || valueSerializerClass == null) {
        throw new IOException("Input format is not configured with a valid NamedMap.");
    }//ww  w . j  a  v  a 2  s . c o  m

    CustomSerializer<K> keySerializer = ReflectionUtils.newInstance(keySerializerClass, configuration);
    keySerializer.setObjectClass((Class<K>) configuration.getClass(inputNamedMapKeyProperty, null));
    CustomSerializer<V> valueSerializer = ReflectionUtils.newInstance(valueSerializerClass, configuration);
    valueSerializer.setObjectClass((Class<V>) configuration.getClass(inputNamedMapValueProperty, null));
    int smOrdinal = configuration.getInt(SERIALIZATION_MODE, SerializationMode.DEFAULT.ordinal());
    SerializationMode serializationMode = SerializationMode.values()[smOrdinal];

    return new NamedMapRecordReaderMapred(inputSplit, configuration, mapId, keySerializer, valueSerializer,
            serializationMode);
}

From source file:com.scaleoutsoftware.soss.hserver.NamedMapOutputFormatMapred.java

License:Apache License

@Override
public RecordWriter getRecordWriter(FileSystem fileSystem, JobConf configuration, String s,
        Progressable progressable) throws IOException {
    String mapName = configuration.get(outputNamedMapProperty);
    Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration
            .getClass(outputNamedMapKeySerializerProperty, null);
    Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration
            .getClass(outputNamedMapValueSerializerProperty, null);
    int smOrdinal = configuration.getInt(SERIALIZATION_MODE, SerializationMode.DEFAULT.ordinal());
    int amOrdinal = configuration.getInt(AVAILABILITY_MODE, AvailabilityMode.USE_REPLICAS.ordinal());
    SerializationMode serializationMode = SerializationMode.values()[smOrdinal];
    AvailabilityMode availabilityMode = AvailabilityMode.values()[amOrdinal];

    if (mapName == null || mapName.length() == 0 || keySerializerClass == null
            || valueSerializerClass == null) {
        throw new IOException("Input format is not configured with a valid NamedMap.");
    }//from ww w .j  av a 2s.c o  m

    CustomSerializer<K> keySerializer = ReflectionUtils.newInstance(keySerializerClass, configuration);
    keySerializer.setObjectClass((Class<K>) configuration.getClass(outputNamedMapKeyProperty, null));
    CustomSerializer<V> valueSerializer = ReflectionUtils.newInstance(valueSerializerClass, configuration);
    valueSerializer.setObjectClass((Class<V>) configuration.getClass(outputNamedMapValueProperty, null));
    NamedMap<K, V> namedMap = NamedMapFactory.getMap(mapName, keySerializer, valueSerializer);
    namedMap.setAvailabilityMode(availabilityMode);
    namedMap.setSerializationMode(serializationMode);

    return new NamedMapRecordWriter<K, V>(namedMap);
}

From source file:com.scaleoutsoftware.soss.hserver.NamedMapOutputFormatMapred.java

License:Apache License

@Override
public void checkOutputSpecs(FileSystem fileSystem, JobConf configuration) throws IOException {
    String mapName = configuration.get(outputNamedMapProperty);
    Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration
            .getClass(outputNamedMapKeySerializerProperty, null);
    Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration
            .getClass(outputNamedMapValueSerializerProperty, null);

    if (mapName == null || mapName.length() == 0 || keySerializerClass == null
            || valueSerializerClass == null) {
        throw new IOException("Input format is not configured with a valid NamedMap.");
    }/*ww w . jav  a 2  s. com*/
}

From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapper.java

License:Apache License

public void configure(JobConf job) {
    super.configure(job);
    //disable the auto increment of the counter. For streaming, no of 
    //processed records could be different(equal or less) than the no of 
    //records input.
    SkipBadRecords.setAutoIncrMapperProcCount(job, false);
    skipping = job.getBoolean("mapred.skip.on", false);
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class)
            .getCanonicalName();/* w  w  w. j av a 2 s.  c  om*/
    ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName());

    try {
        mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
        mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
        numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
    } catch (UnsupportedEncodingException e) {
        throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
    }
}