org.mrgeo.pig.AutoLoadFunc.java Source code

Introduction

Here is the source code for org.mrgeo.pig.AutoLoadFunc.java
Source

/*
 * Copyright 2009-2015 DigitalGlobe, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and limitations under the License.
 */

package org.mrgeo.pig;

import org.apache.commons.lang.time.FastDateFormat;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.pig.*;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.mrgeo.format.AutoFeatureInputFormat;
import org.mrgeo.geometry.Geometry;
import org.mrgeo.utils.HadoopUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

public class AutoLoadFunc extends LoadFunc implements LoadMetadata {
    @SuppressWarnings("rawtypes")
    private RecordReader _reader;
    private TupleFactory _tupleFactory = TupleFactory.getInstance();

    @Override
    public InputFormat getInputFormat() throws IOException {
        return new AutoFeatureInputFormat();
    }

    @Override
    public Tuple getNext() throws IOException {
        Tuple result = null;
        Geometry f;
        boolean gotFeature;
        try {
            gotFeature = _reader.nextKeyValue();
            f = (Geometry) _reader.getCurrentValue();
        } catch (InterruptedException e) {
            throw new IOException(e);
        }

        if (gotFeature) {
            Map<String, String> attrs = f.getAllAttributesSorted();
            ArrayList<Object> entries = new ArrayList<Object>(attrs.size() + 1);

            // skip 0
            entries.add(null);

            for (String attr : attrs.values()) {
                entries.add(attr);
            }

            result = _tupleFactory.newTupleNoCopy(entries);
        }

        return result;
    }

    @Override
    public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
        _reader = reader;
    }

    @Override
    public void setLocation(String location, Job job) throws IOException {
        FileInputFormat.setInputPaths(job, location);
    }

    @Override
    public String[] getPartitionKeys(String arg0, Job arg1) throws IOException {
        // not needed.
        return null;
    }

    @Override
    public ResourceSchema getSchema(String path, Job job) throws IOException {
        try {
            ResourceSchema result = null;
            AutoFeatureInputFormat input = new AutoFeatureInputFormat();
            job.getConfiguration().set("mapred.input.dir", path);
            List<InputSplit> splits = input.getSplits(job);
            RecordReader<LongWritable, Geometry> reader = input.createRecordReader(splits.get(0),
                    HadoopUtils.createTaskAttemptContext(job.getConfiguration(), new TaskAttemptID()));

            if (reader.nextKeyValue()) {
                result = new ResourceSchema();
                Geometry f = reader.getCurrentValue();

                Map<String, String> attrs = f.getAllAttributesSorted();

                ResourceFieldSchema[] fields = new ResourceFieldSchema[attrs.size()];

                int ndx = 0;
                for (Map.Entry<String, String> attr : attrs.entrySet()) {
                    fields[ndx] = new ResourceFieldSchema();
                    fields[ndx].setName(attr.getKey());

                    String val = attr.getValue();

                    try {
                        Double.parseDouble(val);
                        fields[ndx].setType(DataType.DOUBLE);
                    } catch (NumberFormatException e) {
                        try {
                            Integer.parseInt(val);
                            fields[ndx].setType(DataType.INTEGER);
                        } catch (NumberFormatException e2) {
                            fields[ndx].setType(DataType.CHARARRAY);
                        }
                    }

                    ndx++;
                }

                result.setFields(fields);
            }

            return result;
        } catch (InterruptedException e) {
            throw new IOException(e);
        }
    }

    @Override
    public ResourceStatistics getStatistics(String arg0, Job arg1) throws IOException {
        // not needed.
        return null;
    }

    @Override
    public void setPartitionFilter(Expression arg0) throws IOException {
        // not needed.
    }
}