Example usage for org.apache.hadoop.util ReflectionUtils newInstance

List of usage examples for org.apache.hadoop.util ReflectionUtils newInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.util ReflectionUtils newInstance.

Prototype

@SuppressWarnings("unchecked")
public static <T> T newInstance(Class<T> theClass, Configuration conf) 

Source Link

Document

Create an object for the given class and initialize it from conf

Usage

From source file:ComRoughSetApproInputSampler.java

License:Apache License

/**
 * Write a partition file for the given job, using the Sampler provided.
 * Queries the sampler for a sample keyset, sorts by the output key
 * comparator, selects the keys for each rank, and writes to the destination
 * returned from {@link TotalOrderPartitioner#getPartitionFile}.
 *///from w w w  .  j a v a 2 s  . c  om
@SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = job.getConfiguration();
    final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    int numPartitions = job.getNumReduceTasks();
    K[] samples = (K[]) sampler.getSample(inf, job);
    LOG.info("Using " + samples.length + " samples");
    RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
    Arrays.sort(samples, comparator);
    Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
    FileSystem fs = dst.getFileSystem(conf);
    if (fs.exists(dst)) {
        fs.delete(dst, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(),
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    float stepSize = samples.length / (float) numPartitions;
    int last = -1;
    for (int i = 1; i < numPartitions; ++i) {
        int k = Math.round(stepSize * i);
        while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
            ++k;
        }
        writer.append(samples[k], nullValue);
        last = k;
    }
    writer.close();
}

From source file:Vectors.java

License:Apache License

public static Vector readSequenceFile(Path path, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    for (FileStatus fileStatus : fs.listStatus(path)) {
        if (fileStatus.getPath().getName().contains("part-")) {
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, fileStatus.getPath(), conf);
                Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
                VectorWritable value = (VectorWritable) ReflectionUtils.newInstance(reader.getValueClass(),
                        conf);/*from   w w w .  j av  a2 s .co m*/
                reader.next(key, value);
                return value.get();
            } finally {
                IOUtils.closeStream(reader);
            }
        }
    }
    return null;
}

From source file:backup.datanode.BackupFsDatasetSpiFactory.java

License:Apache License

private void setupDefaultFactory(Configuration conf) {
    if (factory == null) {
        Class<? extends Factory> defaultFactoryClass = conf.getClass(DFS_DATANODE_BACKUP_FSDATASET_FACTORY_KEY,
                FsDatasetFactory.class, Factory.class);
        factory = ReflectionUtils.newInstance(defaultFactoryClass, conf);
    }//from   w  w  w . j a v  a  2 s.c o m
}

From source file:be.ugent.intec.halvade.uploader.mapreduce.MyFastqOutputFormat.java

public RecordWriter<PairedIdWritable, FastqRecord> getRecordWriter(TaskAttemptContext task) throws IOException {
    Configuration conf = task.getConfiguration();
    boolean isCompressed = getCompressOutput(task);

    CompressionCodec codec = null;//from  w  w w. j a  v a  2s .  c o  m
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(task, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }

    Path file = getDefaultWorkFile(task, extension);
    FileSystem fs = file.getFileSystem(conf);

    OutputStream output;

    if (isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        output = new DataOutputStream(codec.createOutputStream(fileOut));
    } else {
        output = fs.create(file, false);
    }

    return new FastqRecordWriter(conf, output);
}

From source file:bigsatgps.BigDataHandler.java

License:Open Source License

/**
 *
 * @param inpath//from  www .ja v  a  2 s  . c o  m
 * @throws IOException
 */
public void SequenceToImage(String inpath) throws IOException {
    FileSystem fs = FileSystem.get(confHadoop);
    Path inputFilePath = new Path(inpath);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, inputFilePath, confHadoop);
    Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), confHadoop);
    BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), confHadoop);
    reader.next(key, value);
    byte[] imagebytearray = value.copyBytes();
    BufferedImage bufferedseq = ImageIO.read(new ByteArrayInputStream(imagebytearray));
    String outpath = (inpath.substring(0, inpath.indexOf(".")) + "copy.tif");
    System.out.println("image was successfully retrieved and written to " + outpath);
    ImageIO.write(bufferedseq, "tiff", new File(outpath));
}

From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianInputFormat.java

License:Open Source License

private List<InputSplit> getInputSplits(JobContext jobContext, String inputFormatClass, Path path)
        throws ClassNotFoundException, IOException {
    Configuration conf = jobContext.getConfiguration();
    FileInputFormat inputFormat = (FileInputFormat) ReflectionUtils.newInstance(Class.forName(inputFormatClass),
            conf);//from   w  w  w.j  ava2s  . c o  m

    // Set the input path for the left data set
    path = path.getFileSystem(conf).makeQualified(path);
    String dirStr = StringUtils.escapeString(path.toString());
    String dirs = conf.get(INPUT_DIR);
    conf.set(INPUT_DIR, dirStr);
    return inputFormat.getSplits(jobContext);
}

From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianRecordReader.java

License:Open Source License

public CartesianRecordReader(CompositeInputSplit split, TaskAttemptContext taskAttemptContext)
        throws IOException {

    this.leftIS = split.get(0);
    this.rightIS = split.get(1);
    this.rightTaskAttemptContext = taskAttemptContext;
    this.key = new Text();
    this.value = new Text();
    Configuration conf = rightTaskAttemptContext.getConfiguration();
    try {/* w  ww.ja  v  a2s  .c o  m*/
        // Create left record reader
        FileInputFormat leftFIF = (FileInputFormat) ReflectionUtils
                .newInstance(Class.forName(conf.get(CartesianInputFormat.LEFT_INPUT_FORMAT)), conf);

        leftRR = leftFIF.createRecordReader(leftIS, taskAttemptContext);

        // Create right record reader
        rightFIF = (FileInputFormat) ReflectionUtils
                .newInstance(Class.forName(conf.get(CartesianInputFormat.RIGHT_INPUT_FORMAT)), conf);
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new IOException(e);
    } catch (InterruptedException e) {
        e.printStackTrace();
        throw new IOException(e);
    }
}

From source file:cascading.flow.tez.Hadoop2TezFlowProcess.java

License:Open Source License

@Override
public Object newInstance(String className) {
    if (className == null || className.isEmpty())
        return null;

    try {//from   w  w  w  .j a va  2  s  . c o m
        Class type = (Class) Hadoop2TezFlowProcess.class.getClassLoader().loadClass(className.toString());

        return ReflectionUtils.newInstance(type, configuration);
    } catch (ClassNotFoundException exception) {
        throw new CascadingException("unable to load class: " + className.toString(), exception);
    }
}

From source file:cascading.pipe.cogroup.CoGroupClosure.java

License:Open Source License

public CompressionCodec getCompressionCodec(FlowProcess flowProcess) {
    String compress = (String) flowProcess.getProperty(SPILL_COMPRESS);

    if (compress != null && !Boolean.parseBoolean(compress))
        return null;

    String codecs = (String) flowProcess.getProperty(SPILL_CODECS);

    if (codecs == null || codecs.length() == 0)
        codecs = defaultCodecs;//from w  ww  .  j  av a 2 s.  c  om

    Class<? extends CompressionCodec> codecClass = null;

    for (String codec : codecs.split("[,\\s]+")) {
        try {
            LOG.info("attempting to load codec: " + codec);
            codecClass = Thread.currentThread().getContextClassLoader().loadClass(codec)
                    .asSubclass(CompressionCodec.class);

            if (codecClass != null) {
                LOG.info("found codec: " + codec);

                break;
            }
        } catch (ClassNotFoundException exception) {
            // do nothing
        }
    }

    if (codecClass == null) {
        LOG.warn("codecs set, but unable to load any: " + codecs);
        return null;
    }

    return ReflectionUtils.newInstance(codecClass, ((HadoopFlowProcess) flowProcess).getJobConf());
}

From source file:cascading.tap.hadoop.io.MultiInputSplit.java

License:Open Source License

public void readFields(DataInput in) throws IOException {
    String splitType = in.readUTF();
    config = new HashMap<String, String>();

    String[] keys = WritableUtils.readStringArray(in);
    String[] values = WritableUtils.readStringArray(in);

    for (int i = 0; i < keys.length; i++)
        config.put(keys[i], values[i]);/*w w w. j av  a2 s  . c  om*/

    if (LOG.isDebugEnabled()) {
        LOG.debug("current split config diff:");
        for (Map.Entry<String, String> entry : config.entrySet())
            LOG.debug("key: {}, value: {}", entry.getKey(), entry.getValue());
    }

    JobConf currentConf = HadoopUtil.mergeConf(jobConf, config, false);

    try {
        inputSplit = (InputSplit) ReflectionUtils.newInstance(currentConf.getClassByName(splitType),
                currentConf);
    } catch (ClassNotFoundException exp) {
        throw new IOException("split class " + splitType + " not found");
    }

    inputSplit.readFields(in);

    if (inputSplit instanceof FileSplit) {
        Path path = ((FileSplit) inputSplit).getPath();

        if (path != null) {
            jobConf.set(CASCADING_SOURCE_PATH, path.toString());

            LOG.info("current split input path: {}", path);
        }
    }
}