com.ibm.jaql.io.hadoop.Db2DpfTableInputFormat.java Source code

Introduction

Here is the source code for com.ibm.jaql.io.hadoop.Db2DpfTableInputFormat.java
Source

/*
 * Copyright (C) IBM Corp. 2009.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.ibm.jaql.io.hadoop;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.lang.reflect.UndeclaredThrowableException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Properties;

import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;

public class Db2DpfTableInputFormat extends AbstractDb2InputFormat {
    public static final String SCHEMA_KEY = "com.ibm.db2.input.schema";
    public static final String TABLE_KEY = "com.ibm.db2.input.table";
    public static final String COLUMNS_KEY = "com.ibm.db2.input.columns";
    public static final String WHERE_KEY = "com.ibm.db2.input.where";

    public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {
        try {
            init(conf);
            String schema = conf.get(SCHEMA_KEY);
            String table = conf.get(TABLE_KEY);
            String columns = conf.get(COLUMNS_KEY, "*");
            String where = conf.get(WHERE_KEY, "");

            String schemaTable = "\"" + schema + "\".\"" + table + "\"";
            String tablePred = "TABSCHEMA='" + schema + "' and TABNAME='" + table + "'";

            String keyColQuery = "select COLNAME from syscat.columns where " + tablePred + " and PARTKEYSEQ = 1";

            Statement stmt = conn.createStatement();
            ResultSet rs = stmt.executeQuery(keyColQuery);
            if (!rs.next()) {
                // TODO: we could revert to primary key partitioning instead of raising an error
                throw new IOException("partitioning key not found for " + schemaTable);
            }
            String keyCol = rs.getString(1);
            rs.close();
            stmt.close();

            String partQuery = " select p.dbpartitionnum "
                    + " from syscat.tables t, syscat.tablespaces ts, syscat.dbpartitiongroupdef p "
                    + " where t.tbspaceid = ts.tbspaceid and ts.dbpgname = p.dbpgname "
                    + "   and t.partition_mode = 'H' and " + tablePred;

            stmt = conn.createStatement();
            rs = stmt.executeQuery(partQuery);

            ArrayList<InputSplit> splits = new ArrayList<InputSplit>();

            if (!rs.next()) {
                throw new IOException("no partitions found for table \"" + schema + "\".\"" + table + "\"");
            }

            String query = "select " + columns + " from " + schemaTable + " where SYSIBM.DBPARTITIONNUM(\"" + keyCol
                    + "\") = CURRENT DBPARTITIONNUM\n";
            if (!where.equals("")) {
                query += " and (" + where + ")\n";
            }

            do {
                int partitionId = rs.getInt(1);
                splits.add(new DpfSplit(query, partitionId));
            } while (rs.next());

            rs.close();
            stmt.close();
            conn.close();

            return splits.toArray(new InputSplit[splits.size()]);
        } catch (SQLException e) {
            throw new UndeclaredThrowableException(e); // IOException(e);
        }
    }

    public RecordReader<JsonHolder, JsonHolder> getRecordReader(InputSplit split, JobConf conf, Reporter reporter)
            throws IOException {
        try {
            DpfSplit dpfSplit = (DpfSplit) split;
            Properties props = new Properties();
            props.setProperty("connectNode", Integer.toString(dpfSplit.partitionId));
            init(conf, props);

            Statement s = conn.createStatement();
            ResultSet rs = s.executeQuery("values current dbpartitionnum");
            if (!rs.next()) {
                throw new IOException("couldn't get dbpartitionnum");
            }
            int p = rs.getInt(1);
            if (p != dpfSplit.partitionId) {
                throw new IOException("didn't connect to the right dbpartitionnum.  Expected "
                        + dpfSplit.partitionId + " got " + p);
            }

            PreparedStatement stmt = conn.prepareStatement(dpfSplit.query);
            return new JdbcRecordReader(conn, stmt);
        } catch (SQLException e) {
            throw new UndeclaredThrowableException(e); //IOException(e);
        }
    }

    protected static class DpfSplit implements InputSplit {
        protected String query;
        protected int partitionId;

        public DpfSplit(String query, int partitionId) {
            this.query = query;
            this.partitionId = partitionId;
        }

        public long getLength() throws IOException {
            return 1000000; // TODO: get partition size from catalog
        }

        public String[] getLocations() throws IOException {
            return null; // TODO: DPF affinity
        }

        public void readFields(DataInput in) throws IOException {
            query = in.readUTF();
            partitionId = in.readInt();
        }

        public void write(DataOutput out) throws IOException {
            out.writeUTF(query);
            out.writeInt(partitionId);
        }
    }

    @Deprecated
    public void validateInput(JobConf conf) throws IOException {
    }
}