com.ibm.jaql.io.hadoop.Db2InputFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.ibm.jaql.io.hadoop.Db2InputFormat.java

Source

/*
 * Copyright (C) IBM Corp. 2008.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.ibm.jaql.io.hadoop;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.StringReader;
import java.lang.reflect.UndeclaredThrowableException;
import java.sql.Connection;
import java.sql.Driver;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.util.ArrayList;
import java.util.Properties;
import java.util.Map.Entry;

import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;

import com.ibm.jaql.json.parser.JsonParser;
import com.ibm.jaql.json.parser.ParseException;
import com.ibm.jaql.json.type.BufferedJsonRecord;
import com.ibm.jaql.json.type.JsonRecord;
import com.ibm.jaql.json.type.JsonString;
import com.ibm.jaql.json.type.JsonUtil;
import com.ibm.jaql.json.type.JsonValue;
import com.ibm.jaql.json.type.MutableJsonBinary;
import com.ibm.jaql.json.type.MutableJsonDate;
import com.ibm.jaql.json.type.MutableJsonDecimal;
import com.ibm.jaql.json.type.MutableJsonDouble;
import com.ibm.jaql.json.type.MutableJsonLong;
import com.ibm.jaql.json.type.MutableJsonString;

public class Db2InputFormat implements InputFormat<JsonHolder, JsonHolder> {
    public static final String DRIVER_KEY = "com.ibm.db2.input.driver";
    public static final String URL_KEY = "com.ibm.db2.input.url";
    public static final String PROPERTIES_KEY = "com.ibm.db2.input.properties"; // json record key/value into JDBC Properties
    public static final String QUERY_KEY = "com.ibm.db2.input.query";
    public static final String SPLIT_QUERY_KEY = "com.ibm.db2.input.split.query";
    // public static final String SPLIT_COLUMN_KEY        = "com.ibm.db2.input.split.column";
    // public static final String SPLIT_SAMPLING_RATE_KEY = "com.ibm.db2.input.split.sampling.rate";

    protected Connection conn;

    protected void init(JobConf conf) throws IOException, SQLException {
        String url = conf.get(URL_KEY);
        String jsonRec = conf.get(PROPERTIES_KEY);
        Class<? extends Driver> driverClass = conf.getClass(DRIVER_KEY, null, Driver.class);
        if (driverClass == null) {
            throw new RuntimeException("jdbc driver class not found: " + conf.get(DRIVER_KEY));
        }

        Driver driver;
        try {
            driver = driverClass.newInstance();
        } catch (Exception e) {
            throw new UndeclaredThrowableException(e);// IOException("Error constructing jdbc driver", e);
        }

        Properties props = new Properties();

        if (jsonRec != null && !"".equals(jsonRec)) {
            try {
                JsonParser parser = new JsonParser(new StringReader(jsonRec));
                JsonRecord jrec = (JsonRecord) parser.JsonVal();
                for (Entry<JsonString, JsonValue> f : jrec) {
                    JsonString key = f.getKey();
                    JsonValue value = f.getValue();
                    props.setProperty(key.toString(), value == null ? null : value.toString());
                }
            } catch (ParseException pe) {
                throw new UndeclaredThrowableException(pe); // IOException("couldn't parse "+PROPERTIES_KEY+" = "+jsonRec, pe);
            }
        }

        // conn = DriverManager.getConnection(url, props);
        conn = driver.connect(url, props);
    }

    public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {
        try {
            String dataQuery = conf.get(QUERY_KEY);
            String splitQuery = conf.get(SPLIT_QUERY_KEY);
            // String col        = conf.get(SPLIT_COLUMN_KEY);
            // String rate       = conf.get(SPLIT_SAMPLING_RATE_KEY, "0.0001");

            init(conf);

            //      String sample = "";
            //      if( ! BigDecimal.ONE.equals(new BigDecimal(rate)) )
            //      {
            //        sample = " tablesample system(100*decimal('"+rate+"'))";
            //      }

            ArrayList<InputSplit> splits = new ArrayList<InputSplit>();
            if (numSplits <= 1) {
                // we only want one split
                splits.add(new JdbcSplit(dataQuery, null, null));
                return splits.toArray(new InputSplit[splits.size()]);
            }

            // Make sure that the data query is executable and get the key column type.
            ResultSetMetaData meta = conn.prepareStatement(dataQuery).getMetaData();
            int dataColCount = meta.getColumnCount();
            // int keyType = meta.getColumnType(1);
            // KeyConverter converter = makeKeyConverter(keyType);

            String query = "with T";
            String sep = "(";
            for (int i = 1; i <= dataColCount; i++) {
                query += sep + meta.getColumnName(i);
                sep = ",";
            }
            query += ") as (" + dataQuery + ") " + " select * from T ";
            String keycol = meta.getColumnName(1);
            dataQuery = query;

            query = "with S1(c) as (" + splitQuery + "), " + "     S2(c) as (select distinct c from S1), "
                    + "     S3(c,r) as (select c, dense_rank() over (order by c) from S2), "
                    + "     S4(c,r,i) as (select c, r, mod(r,(select count(*) from S2) / " + numSplits
                    + ") from S3), " + "     S5(c) as (select c from S4 where i = 0) "
                    + "select c from S5 order by c fetch first " + (numSplits - 1) + " rows only";

            Statement stmt = conn.createStatement();
            final ResultSet rs = stmt.executeQuery(query);

            if (!rs.next()) {
                splits.add(new JdbcSplit(dataQuery, null, null));
            } else {
                String prevKey = rs.getString(1);
                // prevKey = converter.convert(prevKey);
                // query = dataQuery + " where "+keycol+" <= "+prevKey;
                query = dataQuery + " where " + keycol + " <= ?";
                splits.add(new JdbcSplit(query, null, prevKey));

                while (rs.next()) {
                    String key = rs.getString(1);
                    // key = converter.convert(key);
                    // query = dataQuery + " where "+keycol+" > "+prevKey+" and "+keycol+" <= "+key;
                    query = dataQuery + " where " + keycol + " > ? and " + keycol + " <= ?";
                    splits.add(new JdbcSplit(query, prevKey, key));
                    prevKey = key;
                }

                // query = dataQuery + " where "+keycol+" > "+prevKey;
                query = dataQuery + " where " + keycol + " > ?";
                splits.add(new JdbcSplit(query, prevKey, null));
            }

            rs.close();
            stmt.close();
            conn.close();

            return splits.toArray(new InputSplit[splits.size()]);
        } catch (SQLException e) {
            throw new UndeclaredThrowableException(e); // IOException(e);
        }
    }

    //  private static abstract class KeyConverter
    //  {
    //    abstract String convert(String inKey);
    //  }
    //  
    //  private KeyConverter makeKeyConverter(int columnType) throws SQLException
    //  {
    //    switch( columnType )
    //    {
    //      case Types.BIGINT:
    //      case Types.INTEGER:
    //      case Types.TINYINT:
    //      case Types.SMALLINT:
    //      case Types.DECIMAL: 
    //      case Types.DOUBLE: 
    //      case Types.FLOAT: 
    //        return new KeyConverter() { 
    //          String convert(String inKey) { 
    //            return inKey; 
    //          }
    //        };
    //
    //      case Types.CHAR:
    //      case Types.VARCHAR:
    //        return new KeyConverter() { 
    //          String convert(String inKey) { 
    //            return "'" + inKey.replace("'", "''") + "'"; 
    //          }
    //        };
    //
    //      case Types.DATE:
    //        return new KeyConverter() { 
    //          String convert(String inKey) { 
    //            return "date('"+ inKey +"')";
    //          }
    //        };
    //        
    //      case Types.TIME:
    //        return new KeyConverter() { 
    //          String convert(String inKey) { 
    //            return "time('"+ inKey +"')";
    //          }
    //        };
    //
    //      case Types.TIMESTAMP:
    //        return new KeyConverter() { 
    //          String convert(String inKey) { 
    //            return "timestamp('"+ inKey +"')";
    //          }
    //        };
    //        
    //      case Types.BINARY:
    //        return new KeyConverter() { 
    //          String convert(String inKey) { 
    //            return "x'"+ inKey +"'"; // TODO: right?
    //          }
    //        };
    //
    //      default:
    //        throw new RuntimeException("Unsupported column type: " + columnType);
    //    }
    //  }

    public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf conf, Reporter reporter)
            throws IOException {
        try {
            init(conf);
            return new JdbcRecordReader(conn, (JdbcSplit) split);
        } catch (SQLException e) {
            throw new UndeclaredThrowableException(e); //IOException(e);
        }
    }

    public static class JdbcSplit implements InputSplit {
        protected String query;
        protected String lowKey;
        protected String highKey;

        /** Call readFields to initialize */
        public JdbcSplit() {
        }

        public JdbcSplit(String query, String lowKey, String highKey) {
            this.query = query;
            this.lowKey = lowKey;
            this.highKey = highKey;
        }

        public long getLength() throws IOException {
            return 1000000; // no clue...
        }

        public String[] getLocations() throws IOException {
            return null; // TODO: DPF affinity
        }

        public void readFields(DataInput in) throws IOException {
            query = in.readUTF();
            if (in.readByte() == 1) {
                lowKey = in.readUTF();
            } else {
                lowKey = null;
            }
            if (in.readByte() == 1) {
                highKey = in.readUTF();
            } else {
                highKey = null;
            }
        }

        public void write(DataOutput out) throws IOException {
            out.writeUTF(query);
            if (lowKey != null) {
                out.writeByte(1);
                out.writeUTF(lowKey);
            } else {
                out.writeByte(0);
            }
            if (highKey != null) {
                out.writeByte(1);
                out.writeUTF(highKey);
            } else {
                out.writeByte(0);
            }
        }
    }

    protected static class JdbcRecordReader implements RecordReader<JsonHolder, JsonHolder> {
        protected final Connection conn;
        protected final PreparedStatement stmt;
        protected final ResultSet resultSet;
        protected final ResultSetMetaData meta;
        protected final int ncols;
        protected final JsonString[] names;
        protected final JsonValue[] values;
        protected long numRecs;

        public JdbcRecordReader(Connection conn, JdbcSplit split) throws SQLException {
            this.conn = conn;
            stmt = conn.prepareStatement(split.query);
            if (split.lowKey != null) {
                stmt.setObject(1, split.lowKey);
                if (split.highKey != null) {
                    stmt.setObject(2, split.highKey);
                }
            } else if (split.highKey != null) {
                stmt.setObject(1, split.highKey);
            }
            resultSet = stmt.executeQuery();
            meta = resultSet.getMetaData();
            ncols = meta.getColumnCount();
            names = new JsonString[ncols];
            values = new JsonValue[ncols];

            for (int i = 0; i < ncols; i++) {
                names[i] = new JsonString(meta.getColumnName(i + 1).toLowerCase());

                switch (meta.getColumnType(i + 1)) {
                case Types.BIGINT:
                case Types.INTEGER:
                case Types.TINYINT:
                case Types.SMALLINT:
                    values[i] = new MutableJsonLong();
                    break;
                case Types.DOUBLE:
                case Types.FLOAT:
                    values[i] = new MutableJsonDouble();
                    break;
                case Types.DECIMAL:
                    values[i] = new MutableJsonDecimal();
                    break;
                case Types.CHAR:
                case Types.VARCHAR:
                case Types.OTHER: // TODO: Types.XML, when jdbc gets there...
                    values[i] = new MutableJsonString();
                    break;
                case Types.DATE:
                case Types.TIME:
                case Types.TIMESTAMP:
                    values[i] = new MutableJsonDate();
                    break;
                case Types.BINARY:
                    values[i] = new MutableJsonBinary();
                    break;
                default:
                    throw new RuntimeException("Unsupported column type: " + meta.getColumnTypeName(i + 1));
                }
            }
        }

        public void close() throws IOException {
            try {
                resultSet.close();
                stmt.close();
                conn.close();
            } catch (SQLException e) {
                throw new UndeclaredThrowableException(e); // IOException(e);
            }
        }

        public JsonHolder createKey() {
            return null;
        }

        /**
         * Warning: For efficiency, this class does not support more than one value to be created (no concurrent readers) 
         */
        public JsonHolder createValue() {
            return new JsonHolder(new BufferedJsonRecord(ncols));
        }

        public long getPos() throws IOException {
            return numRecs;
        }

        public float getProgress() throws IOException {
            return (float) (numRecs / 1000000.0); // hack
        }

        public boolean next(JsonHolder key, JsonHolder value) throws IOException {
            try {
                if (!resultSet.next()) {
                    return false;
                }

                if (value.value == null) {
                    // someone didn't create out key properly!
                    value.value = createValue().value;
                }
                BufferedJsonRecord jrec = (BufferedJsonRecord) value.value;
                jrec.clear();

                for (int i = 0; i < ncols; i++) {
                    switch (meta.getColumnType(i + 1)) {
                    case Types.BIGINT:
                    case Types.INTEGER:
                    case Types.TINYINT:
                    case Types.SMALLINT:
                        ((MutableJsonLong) values[i]).set(resultSet.getLong(i + 1));
                        break;
                    case Types.DECIMAL:
                    case Types.DOUBLE:
                    case Types.FLOAT:
                        ((MutableJsonDecimal) values[i]).set(resultSet.getBigDecimal(i + 1));
                        break;
                    case Types.CHAR:
                    case Types.VARCHAR:
                        String s = resultSet.getString(i + 1);
                        if (s != null) {
                            ((MutableJsonString) values[i]).setCopy(s);
                        }
                        break;
                    case Types.DATE:
                        // TODO: all these need null handling...
                        ((MutableJsonDate) values[i]).set(resultSet.getDate(i + 1).getTime());
                        break;
                    case Types.TIME:
                        ((MutableJsonDate) values[i]).set(resultSet.getTime(i + 1).getTime());
                        break;
                    case Types.TIMESTAMP:
                        ((MutableJsonDate) values[i]).set(resultSet.getTimestamp(i + 1).getTime());
                        break;
                    case Types.BINARY:
                        ((MutableJsonBinary) values[i]).set(resultSet.getBytes(i + 1));
                        break;
                    default:
                        throw new RuntimeException("Unsupported column type: " + meta.getColumnTypeName(i + 1));
                    } // end switch

                    if (!resultSet.wasNull()) {
                        jrec.add(names[i], values[i]);
                    }
                }

                return true;
            } catch (SQLException e) {
                throw new UndeclaredThrowableException(e); // IOException(e);
            }
        }
    }

    @Deprecated
    public void validateInput(JobConf conf) throws IOException {
    }
}