Java tutorial
/* * QCRI, NADEEF LICENSE * NADEEF is an extensible, generalized and easy-to-deploy data cleaning platform built at QCRI. * NADEEF means "Clean" in Arabic * * Copyright (c) 2011-2013, Qatar Foundation for Education, Science and Community Development (on * behalf of Qatar Computing Research Institute) having its principle place of business in Doha, * Qatar with the registered address P.O box 5825 Doha, Qatar (hereinafter referred to as "QCRI") * * NADEEF has patent pending nevertheless the following is granted. * NADEEF is released under the terms of the MIT License, (http://opensource.org/licenses/MIT). */ package qa.qcri.nadeef.core.util; import com.google.common.base.Preconditions; import com.google.common.base.Stopwatch; import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.io.Files; import qa.qcri.nadeef.core.util.sql.DBConnectionPool; import qa.qcri.nadeef.core.util.sql.DBMetaDataTool; import qa.qcri.nadeef.core.util.sql.SQLDialectBase; import qa.qcri.nadeef.tools.DBConfig; import qa.qcri.nadeef.tools.Tracer; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.sql.Connection; import java.sql.SQLException; import java.sql.Statement; import java.util.HashSet; import java.util.List; import java.util.concurrent.TimeUnit; /** * CSVTools is a simple tool which dumps CSV data into database given a table name. */ public class CSVTools { // <editor-fold desc="Public methods"> /** * Reads the content from CSV file. * @param file CSV file. * @param separator separator. * @return a list of tokens (the header line is skipped). */ public static List<String[]> read(File file, String separator) throws IOException { BufferedReader reader = new BufferedReader(new FileReader(file)); List<String[]> result = Lists.newArrayList(); String line; int count = 0; while ((line = reader.readLine()) != null) { if (Strings.isNullOrEmpty(line)) { continue; } count++; // skip the header if (count == 1) { continue; } String[] tokens = line.split(separator); result.add(tokens); } return result; } /** * Append CSV file content into a database table. * @param tableName target table name. * @param dbConfig DB connection config. * @param file CSV file. * @return new created table name. */ public static HashSet<Integer> append(DBConfig dbConfig, SQLDialectBase dialectManager, String tableName, File file) throws Exception { Preconditions.checkNotNull(dbConfig); Preconditions.checkNotNull(dialectManager); Tracer tracer = Tracer.getTracer(CSVTools.class); Stopwatch stopwatch = Stopwatch.createStarted(); HashSet<Integer> result = Sets.newHashSet(); try { boolean hasTableExist = DBMetaDataTool.isTableExist(dbConfig, tableName); // Create table if (!hasTableExist) { throw new IllegalArgumentException("Table " + tableName + " does not exist."); } // get the current max tid. int startTid = DBMetaDataTool.getMaxTid(dbConfig, tableName) + 1; // load the data int size = 0; if (dialectManager.supportBulkLoad()) { size = dialectManager.bulkLoad(dbConfig, tableName, file.toPath(), true); } else { size = dialectManager.fallbackLoad(dbConfig, tableName, file, true); } tracer.info("Appended " + size + " bytes in " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms."); stopwatch.stop(); // build the tid set. int endTid = DBMetaDataTool.getMaxTid(dbConfig, tableName); for (int i = startTid; i <= endTid; i++) { result.add(i); } } catch (Exception ex) { tracer.err("Cannot load file " + file.getName(), ex); } return result; } /** * Dumps CSV file content into a database with default schema name and generated table name. * @param dbConfig DB connection config. * @param dialectManager SQL dialect manager. * @param file CSV file. * @return new created table name. */ public static String dump(DBConfig dbConfig, SQLDialectBase dialectManager, File file) throws IllegalAccessException, SQLException, IOException { String fileName = Files.getNameWithoutExtension(file.getName()); String tableName = dump(dbConfig, dialectManager, file, fileName, true); return tableName; } /** * Dumps CSV file content into a specified database. It replaces the table if the table * already existed. * @param dbConfig JDBC connection config. * @param file CSV file. * @param dialectManager SQL dialect manager. * @param tableName new created table name. * @param overwrite it overwrites existing table if it exists. * * @return new created table name. */ public static String dump(DBConfig dbConfig, SQLDialectBase dialectManager, File file, String tableName, boolean overwrite) throws SQLException { Preconditions.checkNotNull(dbConfig); Preconditions.checkNotNull(dialectManager); Tracer tracer = Tracer.getTracer(CSVTools.class); Stopwatch stopwatch = Stopwatch.createStarted(); String fullTableName = null; String sql; BufferedReader reader = null; try { // overwrites existing tables if necessary fullTableName = "TB_" + tableName; boolean hasTableExist = DBMetaDataTool.isTableExist(dbConfig, fullTableName); // Create table if (hasTableExist && !overwrite) { tracer.info("Found table " + fullTableName + " exists and choose not to overwrite."); return fullTableName; } else { Connection conn = null; Statement stat = null; try { conn = DBConnectionPool.createConnection(dbConfig, true); stat = conn.createStatement(); if (hasTableExist && overwrite) { sql = dialectManager.dropTable(fullTableName); tracer.verbose(sql); stat.execute(sql); } reader = new BufferedReader(new FileReader(file)); // TODO: check whether the header exists. String line; while (Strings.isNullOrEmpty(line = reader.readLine())) ; sql = dialectManager.createTableFromCSV(fullTableName, line); tracer.verbose(sql); stat.execute(sql); tracer.info("Successfully created table " + fullTableName); } finally { if (stat != null) { stat.close(); } if (conn != null) { conn.close(); } } // load the data int size = 0; if (dialectManager.supportBulkLoad()) { size = dialectManager.bulkLoad(dbConfig, fullTableName, file.toPath(), true); } else { size = dialectManager.fallbackLoad(dbConfig, fullTableName, file, true); } tracer.info("Dumped " + size + " bytes in " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms."); stopwatch.stop(); } } catch (Exception ex) { tracer.err("Cannot load file " + file.getName(), ex); } finally { try { if (reader != null) { reader.close(); } } catch (Exception ex) { // ignore } } return fullTableName; } // </editor-fold> }