Example usage for org.apache.hadoop.mapred JobConf getClass

List of usage examples for org.apache.hadoop.mapred JobConf getClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getClass.

Prototype

public Class<?> getClass(String name, Class<?> defaultValue) 

Source Link

Document

Get the value of the name property as a Class.

Usage

From source file:com.digitalpebble.behemoth.tika.TikaMapper.java

License:Apache License

@Override
public void configure(JobConf job) {

    String handlerName = job.get(TikaConstants.TIKA_PROCESSOR_KEY);
    if (handlerName != null) {
        Class handlerClass = job.getClass(handlerName, TikaProcessor.class);
        try {// w ww. ja  v  a  2s.  c  o  m
            processor = (TikaProcessor) handlerClass.newInstance();
        } catch (InstantiationException e) {
            LOG.error("Exception", e);
            // TODO: what's the best way to do this?
            throw new RuntimeException(e);
        } catch (IllegalAccessException e) {
            LOG.error("Exception", e);
            throw new RuntimeException(e);
        }
    } else {
        processor = new TikaProcessor();
    }
    processor.setConf(job);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static Converter getInputConverter(JobConf job, byte input) {
    Converter inputConverter;// www  .j  a v  a 2s. c o  m
    try {
        inputConverter = (Converter) job
                .getClass(INPUT_CONVERTER_CLASS_PREFIX_CONFIG + input, IdenticalConverter.class).newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return inputConverter;
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static Converter getOuputConverter(JobConf job, int i) {
    Converter outputConverter;//from  www .ja  va  2  s .c  om
    try {
        outputConverter = (Converter) job
                .getClass(OUTPUT_CONVERTER_CLASS_PREFIX_CONFIG + i, IdenticalConverter.class).newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return outputConverter;
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

@SuppressWarnings("unchecked")
public static Class<Writable> getInputKeyClass(JobConf job, byte input) {
    return (Class<Writable>) job.getClass(INPUT_KEY_CLASS_PREFIX_CONFIG + input, MatrixIndexes.class);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

@SuppressWarnings("unchecked")
public static Class<Writable> getInputValueClass(JobConf job, byte input) {
    return (Class<Writable>) job.getClass(INPUT_VALUE_CLASS_PREFIX_CONFIG + input, DoubleWritable.class);
}

From source file:com.ibm.bi.dml.runtime.matrix.sort.SamplingSortMRInputFormat.java

License:Open Source License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param conf the job to sample//  w  w w .  ja va2 s.co m
 * @param partFile where to write the output file to
 * @throws IOException if something goes wrong
* @throws IllegalAccessException 
* @throws InstantiationException 
 */
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile)
        throws IOException, InstantiationException, IllegalAccessException {
    SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
    Sampler sampler = new Sampler();

    Class<? extends WritableComparable> targetKeyClass;
    targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS,
            WritableComparable.class);
    //get input converter information
    int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
    int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);

    //indicate whether the matrix value in this mapper is a matrix cell or a matrix block
    int partitions = conf.getNumReduceTasks();

    long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    // take N samples from different parts of the input

    int totalcount = 0;
    for (int i = 0; i < samples; ++i) {
        SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat
                .getRecordReader(splits[sampleStep * i], conf, null);
        int count = 0;
        WritableComparable key = (WritableComparable) reader.createKey();
        Writable value = (Writable) reader.createValue();
        while (reader.next(key, value) && count < recordsPerSample) {
            Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
            inputConverter.setBlockSize(brlen, bclen);
            inputConverter.convert(key, value);
            while (inputConverter.hasNext()) {
                Pair pair = inputConverter.next();
                if (pair.getKey() instanceof DoubleWritable) {
                    sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
                } else if (pair.getValue() instanceof MatrixCell) {
                    sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
                } else
                    throw new IOException("SamplingSortMRInputFormat unsupported key/value class: "
                            + pair.getKey().getClass() + ":" + pair.getValue().getClass());

                count++;
            }
            key = (WritableComparable) reader.createKey();
            value = (Writable) reader.createValue();
        }
        totalcount += count;
    }

    if (totalcount == 0) //empty input files
        sampler.addValue(new DoubleWritable(0));

    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }

    //note: key value always double/null as expected by partitioner
    SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class,
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    int index0 = -1, i = 0;
    boolean lessthan0 = true;
    for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
        writer.append(splitValue, nullValue);
        if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
            index0 = i;
            lessthan0 = false;
        }
        i++;
    }
    if (lessthan0)
        index0 = partitions - 1;
    writer.close();

    return index0;
}

From source file:com.scaleoutsoftware.soss.hserver.NamedMapInputFormatMapred.java

License:Apache License

@Override
public RecordReader getRecordReader(InputSplit inputSplit, JobConf configuration, Reporter reporter)
        throws IOException {
    int mapId = configuration.getInt(inputAppIdProperty, 0);
    Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration
            .getClass(inputNamedMapKeySerializerProperty, null);
    Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration
            .getClass(inputNamedMapValueSerializerProperty, null);

    if (mapId == 0 || keySerializerClass == null || valueSerializerClass == null) {
        throw new IOException("Input format is not configured with a valid NamedMap.");
    }//ww  w . j  a  v  a 2  s . c o  m

    CustomSerializer<K> keySerializer = ReflectionUtils.newInstance(keySerializerClass, configuration);
    keySerializer.setObjectClass((Class<K>) configuration.getClass(inputNamedMapKeyProperty, null));
    CustomSerializer<V> valueSerializer = ReflectionUtils.newInstance(valueSerializerClass, configuration);
    valueSerializer.setObjectClass((Class<V>) configuration.getClass(inputNamedMapValueProperty, null));
    int smOrdinal = configuration.getInt(SERIALIZATION_MODE, SerializationMode.DEFAULT.ordinal());
    SerializationMode serializationMode = SerializationMode.values()[smOrdinal];

    return new NamedMapRecordReaderMapred(inputSplit, configuration, mapId, keySerializer, valueSerializer,
            serializationMode);
}

From source file:com.scaleoutsoftware.soss.hserver.NamedMapOutputFormatMapred.java

License:Apache License

@Override
public RecordWriter getRecordWriter(FileSystem fileSystem, JobConf configuration, String s,
        Progressable progressable) throws IOException {
    String mapName = configuration.get(outputNamedMapProperty);
    Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration
            .getClass(outputNamedMapKeySerializerProperty, null);
    Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration
            .getClass(outputNamedMapValueSerializerProperty, null);
    int smOrdinal = configuration.getInt(SERIALIZATION_MODE, SerializationMode.DEFAULT.ordinal());
    int amOrdinal = configuration.getInt(AVAILABILITY_MODE, AvailabilityMode.USE_REPLICAS.ordinal());
    SerializationMode serializationMode = SerializationMode.values()[smOrdinal];
    AvailabilityMode availabilityMode = AvailabilityMode.values()[amOrdinal];

    if (mapName == null || mapName.length() == 0 || keySerializerClass == null
            || valueSerializerClass == null) {
        throw new IOException("Input format is not configured with a valid NamedMap.");
    }//from ww w .j  av a 2s.c o  m

    CustomSerializer<K> keySerializer = ReflectionUtils.newInstance(keySerializerClass, configuration);
    keySerializer.setObjectClass((Class<K>) configuration.getClass(outputNamedMapKeyProperty, null));
    CustomSerializer<V> valueSerializer = ReflectionUtils.newInstance(valueSerializerClass, configuration);
    valueSerializer.setObjectClass((Class<V>) configuration.getClass(outputNamedMapValueProperty, null));
    NamedMap<K, V> namedMap = NamedMapFactory.getMap(mapName, keySerializer, valueSerializer);
    namedMap.setAvailabilityMode(availabilityMode);
    namedMap.setSerializationMode(serializationMode);

    return new NamedMapRecordWriter<K, V>(namedMap);
}

From source file:com.scaleoutsoftware.soss.hserver.NamedMapOutputFormatMapred.java

License:Apache License

@Override
public void checkOutputSpecs(FileSystem fileSystem, JobConf configuration) throws IOException {
    String mapName = configuration.get(outputNamedMapProperty);
    Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration
            .getClass(outputNamedMapKeySerializerProperty, null);
    Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration
            .getClass(outputNamedMapValueSerializerProperty, null);

    if (mapName == null || mapName.length() == 0 || keySerializerClass == null
            || valueSerializerClass == null) {
        throw new IOException("Input format is not configured with a valid NamedMap.");
    }/*ww w . jav  a 2  s. com*/
}

From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapper.java

License:Apache License

public void configure(JobConf job) {
    super.configure(job);
    //disable the auto increment of the counter. For streaming, no of 
    //processed records could be different(equal or less) than the no of 
    //records input.
    SkipBadRecords.setAutoIncrMapperProcCount(job, false);
    skipping = job.getBoolean("mapred.skip.on", false);
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class)
            .getCanonicalName();/* w  w  w. j av a 2 s.  c  om*/
    ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName());

    try {
        mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
        mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
        numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
    } catch (UnsupportedEncodingException e) {
        throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
    }
}