org.mrgeo.format.AutoGeometryInputFormat.java Source code

Java tutorial

Introduction

Here is the source code for org.mrgeo.format.AutoGeometryInputFormat.java

Source

/*
 * Copyright 2009-2015 DigitalGlobe, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and limitations under the License.
 */

package org.mrgeo.format;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.mrgeo.mapreduce.GeometryWritable;
import org.mrgeo.hdfs.utils.HadoopFileUtils;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.LinkedList;
import java.util.List;

/**
 * Automatically determines a file type based on the extension and opens an
 * appropriate input format.
 */
public class AutoGeometryInputFormat extends InputFormat<LongWritable, GeometryWritable> implements Serializable {
    private static final long serialVersionUID = 1L;

    @Override
    public RecordReader<LongWritable, GeometryWritable> createRecordReader(InputSplit split,
            TaskAttemptContext context) throws IOException, InterruptedException {
        RecordReader<LongWritable, GeometryWritable> result = new AutoRecordReader();
        result.initialize(split, context);
        return result;
    }

    @Override
    public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
        Path[] paths = FileInputFormat.getInputPaths(context);
        List<InputSplit> result = new LinkedList<InputSplit>();

        Configuration conf = context.getConfiguration();
        long totalSize = 0;
        for (Path p : paths) {
            totalSize += HadoopFileUtils.getPathSize(conf, p);
        }

        String inputs = conf.get("mapred.input.dir");
        int totalSplits = conf.getInt("mapred.map.tasks", 2);
        for (Path p : paths) {
            double portion = HadoopFileUtils.getPathSize(conf, p) / totalSize;
            int splits = (int) Math.max(1, Math.round(portion * totalSplits));
            conf.setInt("mapred.map.tasks", splits);
            conf.set("mapred.input.dir", p.toString());
            InputFormat<LongWritable, GeometryWritable> f = GeometryInputFormatFactory.getInstance()
                    .createReader(p);
            for (InputSplit s : f.getSplits(context)) {
                result.add(new AutoInputSplit(s, f));
            }
        }
        conf.setInt("mapred.map.tasks", totalSplits);
        conf.set("mapred.input.dir", inputs);

        return result;
    }

    static public class AutoRecordReader extends RecordReader<LongWritable, GeometryWritable> {
        AutoInputSplit split;
        RecordReader<LongWritable, GeometryWritable> reader = null;

        AutoRecordReader() {
        }

        @Override
        public void close() throws IOException {
            reader.close();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return reader.getProgress();
        }

        @SuppressWarnings("hiding")
        @Override
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            this.split = (AutoInputSplit) split;
            reader = this.split.format.createRecordReader(this.split.getSplit(), context);
            reader.initialize(this.split.getSplit(), context);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return reader.nextKeyValue();
        }

        @Override
        public LongWritable getCurrentKey() throws IOException, InterruptedException {
            return reader.getCurrentKey();
        }

        @Override
        public GeometryWritable getCurrentValue() throws IOException, InterruptedException {
            return reader.getCurrentValue();
        }

    }

    public class AutoInputSplit extends InputSplit implements Serializable {
        private static final long serialVersionUID = 1L;
        private transient InputSplit split;
        transient InputFormat<LongWritable, GeometryWritable> format;

        AutoInputSplit(InputSplit split, InputFormat<LongWritable, GeometryWritable> format) {
            if (split instanceof Serializable == false && split instanceof FileSplit == false) {
                throw new IllegalArgumentException(
                        "Only serializable splits are supported. (" + format.getClass().getName() + ")");
            }
            this.split = split;
            this.format = format;
        }

        public InputFormat<LongWritable, GeometryWritable> getFormat() {
            return format;
        }

        @Override
        public long getLength() throws IOException, InterruptedException {
            return split.getLength();
        }

        @Override
        public String[] getLocations() throws IOException, InterruptedException {
            return split.getLocations();
        }

        public InputSplit getSplit() {
            return split;
        }

        private void readObject(ObjectInputStream stream) throws ClassNotFoundException, IOException {
            stream.defaultReadObject();
            Class<?> c = (Class<?>) stream.readObject();
            try {
                format = (InputFormat<LongWritable, GeometryWritable>) c.newInstance();
            } catch (Exception e) {
                throw new IOException(e);
            }
            int obj = stream.readInt();
            if (obj == 1) {
                split = (InputSplit) stream.readObject();
            } else {
                Path p = new Path((String) stream.readObject());
                long length = stream.readLong();
                long start = stream.readLong();
                String[] hosts = (String[]) stream.readObject();
                split = new FileSplit(p, start, length, hosts);
            }
        }

        private void writeObject(ObjectOutputStream stream) throws IOException {
            stream.defaultWriteObject();
            stream.writeObject(format.getClass());
            if (split instanceof Serializable) {
                stream.writeInt(1);
                stream.writeObject(split);
            } else {
                stream.writeInt(0);
                FileSplit fs = (FileSplit) split;
                stream.writeObject(fs.getPath().toString());
                stream.writeLong(fs.getLength());
                stream.writeLong(fs.getStart());
                stream.writeObject(fs.getLocations());
            }
        }
    }
}