Java tutorial
package com.pivotal.hawq.mapreduce; /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ import com.pivotal.hawq.mapreduce.ao.HAWQAOInputFormat; import com.pivotal.hawq.mapreduce.metadata.HAWQTableFormat; import com.pivotal.hawq.mapreduce.metadata.MetadataAccessor; import com.pivotal.hawq.mapreduce.parquet.HAWQParquetInputFormat; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import java.io.IOException; import java.util.List; /** * An InputFormat that reads tuple from HAWQ table as {@link com.pivotal.hawq.mapreduce.HAWQRecord}. * Currently AO and Parquet table are supported. */ public class HAWQInputFormat extends FileInputFormat<Void, HAWQRecord> { private static final String TABLE_FORMAT = "mapreduce.hawq.table.format"; private HAWQAOInputFormat aoInputFormat = new HAWQAOInputFormat(); private HAWQParquetInputFormat parquetInputFormat = new HAWQParquetInputFormat(); @Override public List<InputSplit> getSplits(JobContext job) throws IOException { HAWQTableFormat tableFormat = getTableFormat(job.getConfiguration()); switch (tableFormat) { case AO: return aoInputFormat.getSplits(job); case Parquet: return parquetInputFormat.getSplits(job); default: throw new AssertionError("invalid table format: " + tableFormat); } } @Override public RecordReader<Void, HAWQRecord> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { HAWQTableFormat tableFormat = getTableFormat(context.getConfiguration()); switch (tableFormat) { case AO: return aoInputFormat.createRecordReader(split, context); case Parquet: return parquetInputFormat.createRecordReader(split, context); default: throw new AssertionError("invalid table format: " + tableFormat); } } private HAWQTableFormat getTableFormat(Configuration conf) { String formatName = conf.get(TABLE_FORMAT); if (formatName == null) { throw new IllegalStateException("Please call HAWQInputFormat.setInput first"); } return HAWQTableFormat.valueOf(formatName); } /** * Initializes the map-part of the job with the appropriate input settings * through connecting to Database. * * @param conf The map-reduce job configuration * @param db_url The database URL to connect to * @param username The username for setting up a connection to the database * @param password The password for setting up a connection to the database * @param tableName The name of the table to access to */ public static void setInput(Configuration conf, String db_url, String username, String password, String tableName) { MetadataAccessor accessor = MetadataAccessor.newInstanceUsingJDBC(db_url, username, password, tableName); setInput(conf, accessor); } /** * Initializes the map-part of the job with the appropriate input settings * through reading metadata file stored in local filesystem. * <p/> * To get metadata file, please use gpextract first * * @param conf The map-reduce job configuration * @param metadataFile Path to the metadata file generated by gpextract. */ public static void setInput(Configuration conf, String metadataFile) { MetadataAccessor accessor = MetadataAccessor.newInstanceUsingFile(metadataFile); setInput(conf, accessor); } private static void setInput(Configuration conf, MetadataAccessor accessor) { HAWQTableFormat tableFormat = accessor.getTableFormat(); switch (tableFormat) { case AO: HAWQAOInputFormat.setInput(conf, accessor.getAOMetadata()); break; case Parquet: HAWQParquetInputFormat.setInput(conf, accessor.getParquetMetadata()); break; default: throw new AssertionError("invalid table format: " + tableFormat); } conf.set(TABLE_FORMAT, tableFormat.name()); } }