com.ibm.jaql.io.hadoop.ArrayInputFormat.java Source code

Introduction

Here is the source code for com.ibm.jaql.io.hadoop.ArrayInputFormat.java
Source

/*
 * Copyright (C) IBM Corp. 2008.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.ibm.jaql.io.hadoop;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.lang.reflect.UndeclaredThrowableException;

import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;

import com.ibm.jaql.io.AdapterStore;
import com.ibm.jaql.io.serialization.binary.BinaryFullSerializer;
import com.ibm.jaql.io.serialization.binary.def.DefaultBinaryFullSerializer;
import com.ibm.jaql.json.type.JsonArray;
import com.ibm.jaql.json.type.JsonRecord;
import com.ibm.jaql.json.type.JsonString;
import com.ibm.jaql.json.type.JsonUtil;
import com.ibm.jaql.json.type.JsonValue;
import com.ibm.jaql.util.BaseUtil;

/** Input format that reads an {@link JsonArray} from Hadoop's job configuration and creates
 * a single split for each element of that array. */
public class ArrayInputFormat implements InputFormat<JsonHolder, JsonHolder> {

    public static final String ARRAY_NAME = "array";

    public static final String JOB_ARRAY_NAME = ArrayInputFormat.class.getCanonicalName() + ".array";

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.mapred.InputFormat#getRecordReader(org.apache.hadoop.mapred.InputSplit,
     *      org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.Reporter)
     */
    public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
            throws IOException {
        return new ArrayRecordReader(((ArrayInputSplit) split).getValue());
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop.mapred.JobConf,
     *      int)
     */
    public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
        // get the JArray of Items from the conf
        JsonArray arr = null;
        try {
            arr = ConfUtil.readConfArray(job, JOB_ARRAY_NAME);
        } catch (Exception e) {
            BaseUtil.LOG.warn("array read failed", e);
            throw new RuntimeException(e);
        }
        // make a split for each one
        InputSplit[] splits = null;
        try {
            splits = new InputSplit[(int) arr.count()]; // FIXME: should not require a
            // cast here...
            for (int i = 0; i < splits.length; i++) {
                splits[i] = new ArrayInputSplit(arr.get(i));
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

        return splits;
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.mapred.InputFormat#validateInput(org.apache.hadoop.mapred.JobConf)
     */
    public void validateInput(JobConf job) throws IOException {
        // verify that an array exists
        if (job.get(JOB_ARRAY_NAME) == null)
            throw new IOException("variable name: " + JOB_ARRAY_NAME + ", not set");
    }
}

/**
 * 
 */
class ArrayRecordReader implements RecordReader<JsonHolder, JsonHolder> {

    private JsonValue splitValue;

    private boolean seen = false;

    /**
     * @param value
     */
    public ArrayRecordReader(JsonValue value) {
        this.splitValue = value;
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.mapred.RecordReader#close()
     */
    public void close() throws IOException {
        // nothing
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.mapred.RecordReader#createKey()
     */
    public JsonHolder createKey() {
        return null;
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.mapred.RecordReader#createValue()
     */
    public JsonHolder createValue() {
        return new JsonHolder();
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.mapred.RecordReader#getPos()
     */
    public long getPos() throws IOException {
        return 0;
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.mapred.RecordReader#getProgress()
     */
    public float getProgress() throws IOException {
        return 0;
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.mapred.RecordReader#next(java.lang.Object,
     *      java.lang.Object)
     */
    public boolean next(JsonHolder key, JsonHolder value) throws IOException {
        if (!seen) {
            try {
                value.value = JsonUtil.getCopy(splitValue, value.value);
                seen = true;
                return true;
            } catch (IOException e) {
                throw e;
            } catch (Exception e) {
                throw new UndeclaredThrowableException(e);
            }
        }
        return false;
    }
}

/**
 * 
 */
class ArrayInputSplit implements InputSplit {
    private BinaryFullSerializer serializer = DefaultBinaryFullSerializer.getInstance();
    private JsonValue value;

    /**
     * 
     */
    public ArrayInputSplit() {
        value = null;
    }

    /**
     * @param v
     */
    public ArrayInputSplit(JsonValue v) {
        value = v;
    }

    /**
     * @return
     */
    public JsonValue getValue() {
        return value;
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.mapred.InputSplit#getLength()
     */
    public long getLength() throws IOException {
        return 0;
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.mapred.InputSplit#getLocations()
     */
    public String[] getLocations() throws IOException {
        return new String[0]; // FIXME: use a static
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
     */
    public void readFields(DataInput in) throws IOException {
        value = serializer.read(in, value);
    }

    /*
     * (non-Javadoc)
     * 
     * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
     */
    public void write(DataOutput out) throws IOException {
        serializer.write(out, value);
    }

}

/**
 * 
 */
class ArrayInputConfigurator implements InitializableConfSetter {

    protected JsonRecord options;

    /*
     * (non-Javadoc)
     * 
     * @see com.ibm.jaql.io.hadoop.ConfSetter#init(java.lang.Object)
     */
    public void init(JsonValue data) throws Exception {
        options = AdapterStore.getStore().input.getOption((JsonRecord) data);
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.ibm.jaql.io.hadoop.ConfSetter#setSequential(org.apache.hadoop.mapred.JobConf)
     */
    public void setSequential(JobConf conf) throws Exception {
        set(conf);
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.ibm.jaql.io.hadoop.ConfSetter#setParallel(org.apache.hadoop.mapred.JobConf)
     */
    public void setParallel(JobConf conf) throws Exception {
        set(conf);
    }

    /**
     * @param conf
     * @throws Exception
     */
    protected void set(JobConf conf) throws Exception {
        JsonArray data = (JsonArray) options.get(new JsonString(ArrayInputFormat.ARRAY_NAME));
        ConfUtil.writeConfArray(conf, ArrayInputFormat.JOB_ARRAY_NAME, data);
    }

}