List of usage examples for org.apache.hadoop.mapred JobConf getClass
public Class<?> getClass(String name, Class<?> defaultValue)
name
property as a Class
. From source file:com.digitalpebble.behemoth.tika.TikaMapper.java
License:Apache License
@Override public void configure(JobConf job) { String handlerName = job.get(TikaConstants.TIKA_PROCESSOR_KEY); if (handlerName != null) { Class handlerClass = job.getClass(handlerName, TikaProcessor.class); try {// w ww. ja v a 2s. c o m processor = (TikaProcessor) handlerClass.newInstance(); } catch (InstantiationException e) { LOG.error("Exception", e); // TODO: what's the best way to do this? throw new RuntimeException(e); } catch (IllegalAccessException e) { LOG.error("Exception", e); throw new RuntimeException(e); } } else { processor = new TikaProcessor(); } processor.setConf(job); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static Converter getInputConverter(JobConf job, byte input) { Converter inputConverter;// www .j a v a 2s. c o m try { inputConverter = (Converter) job .getClass(INPUT_CONVERTER_CLASS_PREFIX_CONFIG + input, IdenticalConverter.class).newInstance(); } catch (Exception e) { throw new RuntimeException(e); } return inputConverter; }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static Converter getOuputConverter(JobConf job, int i) { Converter outputConverter;//from www .ja va 2 s .c om try { outputConverter = (Converter) job .getClass(OUTPUT_CONVERTER_CLASS_PREFIX_CONFIG + i, IdenticalConverter.class).newInstance(); } catch (Exception e) { throw new RuntimeException(e); } return outputConverter; }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
@SuppressWarnings("unchecked") public static Class<Writable> getInputKeyClass(JobConf job, byte input) { return (Class<Writable>) job.getClass(INPUT_KEY_CLASS_PREFIX_CONFIG + input, MatrixIndexes.class); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
@SuppressWarnings("unchecked") public static Class<Writable> getInputValueClass(JobConf job, byte input) { return (Class<Writable>) job.getClass(INPUT_VALUE_CLASS_PREFIX_CONFIG + input, DoubleWritable.class); }
From source file:com.ibm.bi.dml.runtime.matrix.sort.SamplingSortMRInputFormat.java
License:Open Source License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param conf the job to sample// w w w . ja va2 s.co m * @param partFile where to write the output file to * @throws IOException if something goes wrong * @throws IllegalAccessException * @throws InstantiationException */ @SuppressWarnings({ "unchecked", "unused", "deprecation" }) public static int writePartitionFile(JobConf conf, Path partFile) throws IOException, InstantiationException, IllegalAccessException { SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat(); Sampler sampler = new Sampler(); Class<? extends WritableComparable> targetKeyClass; targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS, WritableComparable.class); //get input converter information int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0); int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0); //indicate whether the matrix value in this mapper is a matrix cell or a matrix block int partitions = conf.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 1000); InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks()); int samples = Math.min(10, splits.length); long recordsPerSample = sampleSize / samples; int sampleStep = splits.length / samples; // take N samples from different parts of the input int totalcount = 0; for (int i = 0; i < samples; ++i) { SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat .getRecordReader(splits[sampleStep * i], conf, null); int count = 0; WritableComparable key = (WritableComparable) reader.createKey(); Writable value = (Writable) reader.createValue(); while (reader.next(key, value) && count < recordsPerSample) { Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0); inputConverter.setBlockSize(brlen, bclen); inputConverter.convert(key, value); while (inputConverter.hasNext()) { Pair pair = inputConverter.next(); if (pair.getKey() instanceof DoubleWritable) { sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get())); } else if (pair.getValue() instanceof MatrixCell) { sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue())); } else throw new IOException("SamplingSortMRInputFormat unsupported key/value class: " + pair.getKey().getClass() + ":" + pair.getValue().getClass()); count++; } key = (WritableComparable) reader.createKey(); value = (Writable) reader.createValue(); } totalcount += count; } if (totalcount == 0) //empty input files sampler.addValue(new DoubleWritable(0)); FileSystem outFs = partFile.getFileSystem(conf); if (outFs.exists(partFile)) { outFs.delete(partFile, false); } //note: key value always double/null as expected by partitioner SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class, NullWritable.class); NullWritable nullValue = NullWritable.get(); int index0 = -1, i = 0; boolean lessthan0 = true; for (WritableComparable splitValue : sampler.createPartitions(partitions)) { writer.append(splitValue, nullValue); if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) { index0 = i; lessthan0 = false; } i++; } if (lessthan0) index0 = partitions - 1; writer.close(); return index0; }
From source file:com.scaleoutsoftware.soss.hserver.NamedMapInputFormatMapred.java
License:Apache License
@Override public RecordReader getRecordReader(InputSplit inputSplit, JobConf configuration, Reporter reporter) throws IOException { int mapId = configuration.getInt(inputAppIdProperty, 0); Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration .getClass(inputNamedMapKeySerializerProperty, null); Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration .getClass(inputNamedMapValueSerializerProperty, null); if (mapId == 0 || keySerializerClass == null || valueSerializerClass == null) { throw new IOException("Input format is not configured with a valid NamedMap."); }//ww w . j a v a 2 s . c o m CustomSerializer<K> keySerializer = ReflectionUtils.newInstance(keySerializerClass, configuration); keySerializer.setObjectClass((Class<K>) configuration.getClass(inputNamedMapKeyProperty, null)); CustomSerializer<V> valueSerializer = ReflectionUtils.newInstance(valueSerializerClass, configuration); valueSerializer.setObjectClass((Class<V>) configuration.getClass(inputNamedMapValueProperty, null)); int smOrdinal = configuration.getInt(SERIALIZATION_MODE, SerializationMode.DEFAULT.ordinal()); SerializationMode serializationMode = SerializationMode.values()[smOrdinal]; return new NamedMapRecordReaderMapred(inputSplit, configuration, mapId, keySerializer, valueSerializer, serializationMode); }
From source file:com.scaleoutsoftware.soss.hserver.NamedMapOutputFormatMapred.java
License:Apache License
@Override public RecordWriter getRecordWriter(FileSystem fileSystem, JobConf configuration, String s, Progressable progressable) throws IOException { String mapName = configuration.get(outputNamedMapProperty); Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration .getClass(outputNamedMapKeySerializerProperty, null); Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration .getClass(outputNamedMapValueSerializerProperty, null); int smOrdinal = configuration.getInt(SERIALIZATION_MODE, SerializationMode.DEFAULT.ordinal()); int amOrdinal = configuration.getInt(AVAILABILITY_MODE, AvailabilityMode.USE_REPLICAS.ordinal()); SerializationMode serializationMode = SerializationMode.values()[smOrdinal]; AvailabilityMode availabilityMode = AvailabilityMode.values()[amOrdinal]; if (mapName == null || mapName.length() == 0 || keySerializerClass == null || valueSerializerClass == null) { throw new IOException("Input format is not configured with a valid NamedMap."); }//from ww w .j av a 2s.c o m CustomSerializer<K> keySerializer = ReflectionUtils.newInstance(keySerializerClass, configuration); keySerializer.setObjectClass((Class<K>) configuration.getClass(outputNamedMapKeyProperty, null)); CustomSerializer<V> valueSerializer = ReflectionUtils.newInstance(valueSerializerClass, configuration); valueSerializer.setObjectClass((Class<V>) configuration.getClass(outputNamedMapValueProperty, null)); NamedMap<K, V> namedMap = NamedMapFactory.getMap(mapName, keySerializer, valueSerializer); namedMap.setAvailabilityMode(availabilityMode); namedMap.setSerializationMode(serializationMode); return new NamedMapRecordWriter<K, V>(namedMap); }
From source file:com.scaleoutsoftware.soss.hserver.NamedMapOutputFormatMapred.java
License:Apache License
@Override public void checkOutputSpecs(FileSystem fileSystem, JobConf configuration) throws IOException { String mapName = configuration.get(outputNamedMapProperty); Class<CustomSerializer<K>> keySerializerClass = (Class<CustomSerializer<K>>) configuration .getClass(outputNamedMapKeySerializerProperty, null); Class<CustomSerializer<V>> valueSerializerClass = (Class<CustomSerializer<V>>) configuration .getClass(outputNamedMapValueSerializerProperty, null); if (mapName == null || mapName.length() == 0 || keySerializerClass == null || valueSerializerClass == null) { throw new IOException("Input format is not configured with a valid NamedMap."); }/*ww w . jav a 2 s. com*/ }
From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapper.java
License:Apache License
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); skipping = job.getBoolean("mapred.skip.on", false); String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class) .getCanonicalName();/* w w w. j av a 2 s. c om*/ ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()); try { mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8"); mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8"); numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }