Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.planner.sql.handlers; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.schema.Table; import org.apache.calcite.sql.SqlIdentifier; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlNodeList; import org.apache.calcite.sql.SqlSelect; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.tools.RelConversionException; import org.apache.calcite.tools.ValidationException; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.logical.FormatPluginConfig; import org.apache.drill.exec.dotdrill.DotDrillType; import org.apache.drill.exec.physical.PhysicalPlan; import org.apache.drill.exec.physical.base.PhysicalOperator; import org.apache.drill.exec.planner.common.DrillStatsTable; import org.apache.drill.exec.planner.logical.DrillAnalyzeRel; import org.apache.drill.exec.planner.logical.DrillProjectRel; import org.apache.drill.exec.planner.logical.DrillRel; import org.apache.drill.exec.planner.logical.DrillScanRel; import org.apache.drill.exec.planner.logical.DrillScreenRel; import org.apache.drill.exec.planner.logical.DrillStoreRel; import org.apache.drill.exec.planner.logical.DrillTable; import org.apache.drill.exec.planner.logical.DrillWriterRel; import org.apache.drill.exec.planner.physical.Prel; import org.apache.drill.exec.planner.sql.SchemaUtilites; import org.apache.drill.exec.planner.sql.parser.SqlAnalyzeTable; import org.apache.drill.exec.store.AbstractSchema; import org.apache.drill.exec.store.dfs.DrillFileSystem; import org.apache.drill.exec.store.dfs.FileSystemPlugin; import org.apache.drill.exec.store.dfs.FormatSelection; import org.apache.drill.exec.store.dfs.NamedFormatPluginConfig; import org.apache.drill.exec.store.parquet.ParquetFormatConfig; import org.apache.drill.exec.util.Pointer; import org.apache.drill.exec.work.foreman.ForemanSetupException; import org.apache.drill.exec.work.foreman.SqlUnsupportedException; import org.apache.drill.shaded.guava.com.google.common.collect.Lists; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; public class AnalyzeTableHandler extends DefaultSqlHandler { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AnalyzeTableHandler.class); public AnalyzeTableHandler(SqlHandlerConfig config, Pointer<String> textPlan) { super(config, textPlan); } @Override public PhysicalPlan getPlan(SqlNode sqlNode) throws ValidationException, RelConversionException, IOException, ForemanSetupException { final SqlAnalyzeTable sqlAnalyzeTable = unwrap(sqlNode, SqlAnalyzeTable.class); verifyNoUnsupportedFunctions(sqlAnalyzeTable); SqlIdentifier tableIdentifier = sqlAnalyzeTable.getTableIdentifier(); SqlSelect scanSql = new SqlSelect(SqlParserPos.ZERO, /* position */ SqlNodeList.EMPTY, /* keyword list */ getColumnList(sqlAnalyzeTable), /* select list */ tableIdentifier, /* from */ null, /* where */ null, /* group by */ null, /* having */ null, /* windowDecls */ null, /* orderBy */ null, /* offset */ null /* fetch */ ); final ConvertedRelNode convertedRelNode = validateAndConvert(rewrite(scanSql)); final RelDataType validatedRowType = convertedRelNode.getValidatedRowType(); final RelNode relScan = convertedRelNode.getConvertedNode(); final String tableName = sqlAnalyzeTable.getName(); final AbstractSchema drillSchema = SchemaUtilites .resolveToDrillSchema(config.getConverter().getDefaultSchema(), sqlAnalyzeTable.getSchemaPath()); Table table = SqlHandlerUtil.getTableFromSchema(drillSchema, tableName); if (table == null) { throw UserException.validationError().message("No table with given name [%s] exists in schema [%s]", tableName, drillSchema.getFullSchemaName()).build(logger); } if (!(table instanceof DrillTable)) { return DrillStatsTable.notSupported(context, tableName); } if (table instanceof DrillTable) { DrillTable drillTable = (DrillTable) table; final Object selection = drillTable.getSelection(); if (!(selection instanceof FormatSelection)) { return DrillStatsTable.notSupported(context, tableName); } // Do not support non-parquet tables FormatSelection formatSelection = (FormatSelection) selection; FormatPluginConfig formatConfig = formatSelection.getFormat(); if (!((formatConfig instanceof ParquetFormatConfig) || ((formatConfig instanceof NamedFormatPluginConfig) && ((NamedFormatPluginConfig) formatConfig).name.equals("parquet")))) { return DrillStatsTable.notSupported(context, tableName); } FileSystemPlugin plugin = (FileSystemPlugin) drillTable.getPlugin(); DrillFileSystem fs = new DrillFileSystem( plugin.getFormatPlugin(formatSelection.getFormat()).getFsConf()); Path selectionRoot = formatSelection.getSelection().getSelectionRoot(); if (!selectionRoot.toUri().getPath().endsWith(tableName) || !fs.getFileStatus(selectionRoot).isDirectory()) { return DrillStatsTable.notSupported(context, tableName); } // Do not recompute statistics, if stale Path statsFilePath = new Path(selectionRoot, DotDrillType.STATS.getEnding()); if (fs.exists(statsFilePath) && !isStatsStale(fs, statsFilePath)) { return DrillStatsTable.notRequired(context, tableName); } } // Convert the query to Drill Logical plan and insert a writer operator on top. DrillRel drel = convertToDrel(relScan, drillSchema, tableName, sqlAnalyzeTable.getSamplePercent()); Prel prel = convertToPrel(drel, validatedRowType); logAndSetTextPlan("Drill Physical", prel, logger); PhysicalOperator pop = convertToPop(prel); PhysicalPlan plan = convertToPlan(pop); log("Drill Plan", plan, logger); return plan; } /* Determines if the table was modified after computing statistics based on * directory/file modification timestamps */ private boolean isStatsStale(DrillFileSystem fs, Path statsFilePath) throws IOException { long statsFileModifyTime = fs.getFileStatus(statsFilePath).getModificationTime(); Path parentPath = statsFilePath.getParent(); FileStatus directoryStatus = fs.getFileStatus(parentPath); // Parent directory modified after stats collection? return directoryStatus.getModificationTime() > statsFileModifyTime || tableModified(fs, parentPath, statsFileModifyTime); } /* Determines if the table was modified after computing statistics based on * directory/file modification timestamps. Recursively checks sub-directories. */ private boolean tableModified(DrillFileSystem fs, Path parentPath, long statsModificationTime) throws IOException { for (final FileStatus file : fs.listStatus(parentPath)) { // If directory or files within it are modified if (file.getModificationTime() > statsModificationTime) { return true; } // For a directory, we should recursively check sub-directories if (file.isDirectory() && tableModified(fs, file.getPath(), statsModificationTime)) { return true; } } return false; } /* Generates the column list specified in the ANALYZE statement */ private SqlNodeList getColumnList(final SqlAnalyzeTable sqlAnalyzeTable) { SqlNodeList columnList = sqlAnalyzeTable.getFieldList(); if (columnList == null || columnList.size() <= 0) { columnList = new SqlNodeList(SqlParserPos.ZERO); columnList.add(new SqlIdentifier(SchemaPath.STAR_COLUMN.rootName(), SqlParserPos.ZERO)); } /*final SqlNodeList columnList = new SqlNodeList(SqlParserPos.ZERO); final List<String> fields = sqlAnalyzeTable.getFieldNames(); if (fields == null || fields.size() <= 0) { columnList.add(new SqlIdentifier(SchemaPath.STAR_COLUMN.rootName(), SqlParserPos.ZERO)); } else { for(String field : fields) { columnList.add(new SqlIdentifier(field, SqlParserPos.ZERO)); } }*/ return columnList; } /* Converts to Drill logical plan */ protected DrillRel convertToDrel(RelNode relNode, AbstractSchema schema, String analyzeTableName, double samplePercent) throws SqlUnsupportedException { DrillRel convertedRelNode = convertToRawDrel(relNode); if (convertedRelNode instanceof DrillStoreRel) { throw new UnsupportedOperationException(); } if (convertedRelNode instanceof DrillProjectRel) { DrillProjectRel projectRel = (DrillProjectRel) convertedRelNode; DrillScanRel scanRel = findScan(projectRel); List<RelDataTypeField> fields = Lists.newArrayList(); RexBuilder b = projectRel.getCluster().getRexBuilder(); List<RexNode> projections = Lists.newArrayList(); // Get the original scan column names - after projection pushdown they should refer to the full col names List<String> fieldNames = new ArrayList<>(); List<RelDataTypeField> fieldTypes = projectRel.getRowType().getFieldList(); for (SchemaPath colPath : scanRel.getGroupScan().getColumns()) { fieldNames.add(colPath.toString()); } for (int i = 0; i < fieldTypes.size(); i++) { projections.add(b.makeInputRef(projectRel, i)); } // Get the projection row-types RelDataType newRowType = RexUtil.createStructType(projectRel.getCluster().getTypeFactory(), projections, fieldNames, null); DrillProjectRel renamedProject = DrillProjectRel.create(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, projections, newRowType); convertedRelNode = renamedProject; } final RelNode analyzeRel = new DrillAnalyzeRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, samplePercent); final RelNode writerRel = new DrillWriterRel(analyzeRel.getCluster(), analyzeRel.getTraitSet(), analyzeRel, schema.appendToStatsTable(analyzeTableName)); return new DrillScreenRel(writerRel.getCluster(), writerRel.getTraitSet(), writerRel); } private DrillScanRel findScan(RelNode rel) { if (rel instanceof DrillScanRel) { return (DrillScanRel) rel; } else { return findScan(rel.getInput(0)); } } // Make sure no unsupported features in ANALYZE statement are used private static void verifyNoUnsupportedFunctions(final SqlAnalyzeTable analyzeTable) { // throw unsupported error for functions that are not yet implemented if (analyzeTable.getEstimate()) { throw UserException.unsupportedError().message("Statistics estimation is not yet supported.") .build(logger); } if (analyzeTable.getSamplePercent() <= 0 && analyzeTable.getSamplePercent() > 100.0) { throw UserException.unsupportedError().message("Valid sampling percent between 0-100 is not specified.") .build(logger); } } }