Java tutorial
package link.neolink.datamonster; /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the LICENSE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // Java SQL Stuff import java.io.IOException; import java.sql.Connection; import java.sql.SQLException; // Hadoop general imports import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.util.Tool; // MapReduce imports import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.db.DBConfiguration; import org.apache.hadoop.mapreduce.lib.db.DBInputFormat; import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat; import org.apache.hadoop.mapreduce.lib.db.DBWritable; // HBase imports import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; /** * A MapReduce abstraction class for SQL and HBase-oriented jobs * * {@link DBUpdateOutputFormat} accepts SQL and HBase * for both input and output, SQL output can be INSERT, UPDATE, * or the so-called DELINSERT,which act like INSERT, * but does a TRUNCAT before running the job. * It doesn't support HBase filters for now (don't hesitate to add it ;) ) * */ public class Datamonster extends Configured implements Tool { // Some may argue that i should name rename MAGIC as MOREMAGIC... public enum Type { MAGIC, SELECT, UPDATE, INSERT, DELINSERT, HBASE }; // The (italian) job private Job job = null; // General configuration private Type inType = Type.MAGIC; private Type outType = Type.MAGIC; // Writable class private Class<?> dataWritable = null; private Class<? extends DBWritable> inputWritable = null; private Class<? extends DBWritable> outputWritable = null; private Class<? extends DBWritable> ioWritable = null; // SQL driver/type and credentials, i should replace this someday... private String sqlDriver = "com.mysql.jdbc.Driver"; private String sqlURLType = "mysql"; // SQL-specific configuration private String outTable = ""; private DBConfiguration sqlConfig = null; /** * Create the job and set it's name * * @param name * The job's name */ public Datamonster(String name) throws Exception { Configuration conf = HBaseConfiguration.create(); this.job = Job.getInstance(conf, name); this.job.setJarByClass(super.getClass()); } /** * Get the job */ public Job getJob() { return this.job; } /** * Get the job configuration */ public Configuration getJobConf() { return this.job.getConfiguration(); } /** * Set the SQL credentials * * @param URL * The SQL URI, in JDBC format * @param user * The SQL username * @param password * The SQL password */ public void setSQLCredentials(String URL, String user, String password) { DBConfiguration.configureDB(job.getConfiguration(), this.sqlDriver, URL, user, password); this.sqlConfig = new DBConfiguration(job.getConfiguration()); } /** * Set the SQL credentials * * @param host * The SQL host * @param database * The SQL database * @param user * The SQL username * @param password * The SQL password */ public void setSQLCredentials(String host, String database, String user, String password) { this.setSQLCredentials("jdbc:" + this.sqlURLType + "://" + host + "/" + database, user, password); } /** * configure the JDBC driver * * @param driver * The SQL Driver, default is com.mysql.jdbc.Driver */ public void setSQLDriver(String driver, String URLType) { this.sqlDriver = driver; this.sqlURLType = URLType; } /** * configure the JDBC driver * * @param driver * The SQL Driver, default is com.mysql.jdbc.Driver */ public void setSQLDriver(String driver) { this.sqlDriver = driver; } /** * Get the SQL Connection object */ public Connection getSQLConnection() throws IOException { try { return this.sqlConfig.getConnection(); } catch (ClassNotFoundException ex) { throw new IOException(ex.getMessage()); } catch (SQLException ex) { throw new IOException(ex.getMessage()); } } /** * Writables configuration * * @param dataWritable * The writable that goes between the mapper and the reducer */ public void setWritables(Class<?> dataWritable) { this.dataWritable = dataWritable; } /** * Writables configuration * * @param dataWritable * The writable that goes between the mapper and the reducer * @param ioWritable * The writable that goes either between the SQL and the mapper or the reducer and SQL */ public void setWritables(Class<?> dataWritable, Class<? extends DBWritable> ioWritable) { this.dataWritable = dataWritable; this.ioWritable = ioWritable; } /** * Writables configuration * * @param dataWritable * The Writable that goes between the mapper and the reducer * @param inputWritable * The Writable that goes between the SQL and the mapper * @param outputWritable * The Writable that goes between the reducer and the SQL */ public void setWritables(Class<?> dataWritable, Class<? extends DBWritable> inputWritable, Class<? extends DBWritable> outputWritable) { this.dataWritable = dataWritable; this.inputWritable = inputWritable; this.outputWritable = outputWritable; } /** * Convert a string to a Type * * @param outType * The output type, as a string. */ private void setOutType(String outType) { if (outType.equals("INSERT")) { this.outType = Type.INSERT; } else if (outType.equals("UPDATE")) { this.outType = Type.UPDATE; } else if (outType.equals("DELINSERT")) { this.outType = Type.DELINSERT; } else { System.out.println("Warning: Your SQL OutType is invalid"); } } /** * Mapper configuration, with a dataset from SQL * * @param query * The SQL Query * @param mapper * The mapper class */ public void mapperConfiguration(String query, Class<? extends Mapper> mapper) { if (this.inType != Type.MAGIC) { System.err.println("Input type already defined."); System.exit(-1); } if (this.dataWritable == null) { System.err.println("No DataWritable(s) found, you need to call setDataWritable() before!."); System.exit(-1); } this.inType = Type.SELECT; this.job.setMapperClass(mapper); this.job.setMapOutputKeyClass(Text.class); this.job.setMapOutputValueClass(this.dataWritable); this.job.setInputFormatClass(DBInputFormat.class); if (this.inputWritable != null) { DBInputFormat.setInput(this.job, this.inputWritable, query, "SELECT COUNT(*) FROM (" + query + ") AS query_count"); } else if (this.ioWritable != null) { DBInputFormat.setInput(this.job, this.ioWritable, query, "SELECT COUNT(*) FROM (" + query + ") AS query_count"); } else { System.err.println("Input is SQL, but no input Writable class defined"); System.exit(-1); } } /** * Mapper configuration, with a dataset from HBase * * @param inputTable * The HBase table * @param families * Input table's families * @param mapper * La classe du mapper */ public void mapperConfiguration(String inputTable, String[] families, Class<? extends TableMapper> mapper) throws Exception { if (this.inType != Type.MAGIC) { System.err.println("Input type already defined"); System.exit(-1); } if (this.dataWritable == null) { System.err.println("No DataWritable(s) found, you need to call setDataWritable() before!."); System.exit(-1); } this.inType = Type.HBASE; // Add families to the query Scan scan = new Scan(); for (String family : families) { scan.addFamily(Bytes.toBytes(family)); } TableMapReduceUtil.initTableMapperJob(inputTable, scan, mapper, Text.class, this.dataWritable, this.job); } /** * Reducer configuration, with SQL * * @param outType * Kind of output query, either "INSERT", "UPDATE" or "DELINSERT" * @param outTable * The output table * @param outFields * The output table's fields * @param reducer * The reducer class */ public void reducerConfiguration(String outType, String outTable, String[] outFields, Class<? extends Reducer> reducer) throws Exception { if (this.outType != Type.MAGIC) { System.err.println("Output type already defined"); System.exit(-1); } this.job.setReducerClass(reducer); this.setOutType(outType); job.setOutputValueClass(NullWritable.class); this.outTable = outTable; if (this.outputWritable != null) { job.setOutputKeyClass(this.outputWritable); } else if (this.ioWritable != null) { job.setOutputKeyClass(this.ioWritable); } else { System.err.println("Output is SQL, but no output Writable class defined"); System.exit(-1); } // Define the OutputFormatClass (it's not the same between an INSERT/DELINSERT and a UPDATE) if (this.outType == Type.INSERT || this.outType == Type.DELINSERT) { this.job.setOutputFormatClass(DBOutputFormat.class); } else if (this.outType == Type.UPDATE) { this.job.setOutputFormatClass(DBUpdateOutputFormat.class); } // Configura the DBOutputFormat (same thing, it differs bewteen INSERT and UPDATE) if (this.outType == Type.INSERT || this.outType == Type.DELINSERT) { DBOutputFormat.setOutput(job, outTable, outFields); } else if (this.outType == Type.UPDATE) { DBUpdateOutputFormat.setOutput(job, outTable, outFields); } } /** * Reducer configuration, for SQL * * @param outType * Kind of output query, either "INSERT", "UPDATE" or "DELINSERT" * @param outTable * The output table * @param outFields * The output table's fields * @param outConditions * The output table's conditions * @param reducer * The reducer class */ public void reducerConfiguration(String outType, String outTable, String[] outFields, String[] outConditions, Class<? extends Reducer> reducer) throws Exception { if (this.outType != Type.MAGIC) { System.err.println("Output type already defined!"); System.exit(-1); } if (!outType.equals("UPDATE")) { System.err.println("Conditional fields are for UPDATEs only"); System.exit(-1); } this.job.setReducerClass(reducer); this.setOutType(outType); this.outTable = outTable; if (this.outputWritable != null) { job.setOutputKeyClass(this.outputWritable); } else if (this.ioWritable != null) { job.setOutputKeyClass(this.ioWritable); } else { System.err.println("Output is SQL, but no output Writable class defined"); System.exit(-1); } job.setOutputValueClass(NullWritable.class); this.job.setOutputFormatClass(DBUpdateOutputFormat.class); DBUpdateOutputFormat.setOutput(job, outTable, outFields, outConditions); } /** * Reducer configuration, for HBase * * @param outTable * The output table * @param reducer * The reducer class */ public void reducerConfiguration(String outTable, Class<? extends TableReducer> reducer) throws Exception { if (this.outType != Type.MAGIC) { System.err.println("Output type already defined"); System.exit(-1); } this.outType = Type.HBASE; TableMapReduceUtil.initTableReducerJob(outTable, reducer, job); } @Override public int run(String[] args) throws Exception { if (inType == Type.MAGIC || outType == Type.MAGIC) { System.err.println("Input and/or output type not defined"); return -1; } if (inType != Type.HBASE && outType != Type.HBASE && this.sqlConfig == null) { System.err.println("No SQL information provided."); System.exit(-1); } // If you have both input and output as SQL, and only configured ioWritable, you gonna have a bad time. if (inType != Type.HBASE && outType != Type.HBASE && (this.inputWritable == null || this.outputWritable == null)) { System.out.println( "Warning: Both input and output are SQL, but Writables class saren't corretly defined, maybe you only defined one class?"); } // Do an armageddon on the output table (if we do a DELINSERT) if (this.outType == Type.DELINSERT) { this.sqlConfig.getConnection().createStatement().executeUpdate("DELETE FROM " + outTable); } // Run and wait for the job return this.job.waitForCompletion(true) ? 0 : 1; } }