Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.kylin.tool; import java.io.File; import java.io.IOException; import java.util.List; import java.util.Set; import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.OptionGroup; import org.apache.commons.lang3.StringUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.ResourceTool; import org.apache.kylin.common.util.OptionsHelper; import org.apache.kylin.cube.CubeDescManager; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.job.dao.ExecutableDao; import org.apache.kylin.job.dao.ExecutablePO; import org.apache.kylin.job.exception.PersistentException; import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.badquery.BadQueryHistoryManager; import org.apache.kylin.metadata.model.DataModelDesc; import org.apache.kylin.metadata.model.SegmentStatusEnum; import org.apache.kylin.metadata.model.TableDesc; import org.apache.kylin.metadata.model.TableRef; import org.apache.kylin.metadata.project.ProjectInstance; import org.apache.kylin.metadata.project.ProjectManager; import org.apache.kylin.metadata.project.RealizationEntry; import org.apache.kylin.metadata.realization.IRealization; import org.apache.kylin.metadata.realization.RealizationRegistry; import org.apache.kylin.metadata.realization.RealizationStatusEnum; import org.apache.kylin.metadata.realization.RealizationType; import org.apache.kylin.metadata.streaming.StreamingConfig; import org.apache.kylin.metadata.streaming.StreamingManager; import org.apache.kylin.source.kafka.config.KafkaConfig; import org.apache.kylin.storage.hybrid.HybridInstance; import org.apache.kylin.storage.hybrid.HybridManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Sets; /** * extract cube related info for debugging/distributing purpose */ public class CubeMetaExtractor extends AbstractInfoExtractor { private static final Logger logger = LoggerFactory.getLogger(CubeMetaExtractor.class); @SuppressWarnings("static-access") private static final Option OPTION_CUBE = OptionBuilder.withArgName("cube").hasArg().isRequired(false) .withDescription("Specify which cube to extract").create("cube"); @SuppressWarnings("static-access") private static final Option OPTION_HYBRID = OptionBuilder.withArgName("hybrid").hasArg().isRequired(false) .withDescription("Specify which hybrid to extract").create("hybrid"); @SuppressWarnings("static-access") private static final Option OPTION_PROJECT = OptionBuilder.withArgName("project").hasArg().isRequired(false) .withDescription("Specify realizations in which project to extract").create("project"); @SuppressWarnings("static-access") private static final Option OPTION_All_PROJECT = OptionBuilder.withArgName("allProjects").hasArg(false) .isRequired(false).withDescription("Specify realizations in all projects to extract") .create("allProjects"); @SuppressWarnings("static-access") private static final Option OPTION_STORAGE_TYPE = OptionBuilder.withArgName("storageType").hasArg() .isRequired(false) .withDescription("Specify the storage type to overwrite. Default is empty, keep origin.") .create("storageType"); @SuppressWarnings("static-access") private static final Option OPTION_ENGINE_TYPE = OptionBuilder.withArgName("engineType").hasArg() .isRequired(false) .withDescription("Specify the engine type to overwrite. Default is empty, keep origin.") .create("engineType"); @SuppressWarnings("static-access") private static final Option OPTION_INCLUDE_SEGMENTS = OptionBuilder.withArgName("includeSegments").hasArg() .isRequired(false).withDescription("set this to true if want extract the segments info. Default true") .create("includeSegments"); @SuppressWarnings("static-access") private static final Option OPTION_INCLUDE_JOB = OptionBuilder.withArgName("includeJobs").hasArg() .isRequired(false) .withDescription("set this to true if want to extract job info/outputs too. Default false") .create("includeJobs"); @SuppressWarnings("static-access") private static final Option OPTION_INCLUDE_ONLY_JOB_OUTPUT = OptionBuilder.withArgName("onlyOutput").hasArg() .isRequired(false).withDescription("when include jobs, onlt extract output of job. Default true") .create("onlyOutput"); @SuppressWarnings("static-access") private static final Option OPTION_INCLUDE_SEGMENT_DETAILS = OptionBuilder.withArgName("includeSegmentDetails") .hasArg().isRequired(false) .withDescription( "set this to true if want to extract segment details too, such as dict, tablesnapshot. Default false") .create("includeSegmentDetails"); private KylinConfig kylinConfig; private MetadataManager metadataManager; private ProjectManager projectManager; private HybridManager hybridManager; private CubeManager cubeManager; private StreamingManager streamingManager; private CubeDescManager cubeDescManager; private ExecutableDao executableDao; private RealizationRegistry realizationRegistry; private BadQueryHistoryManager badQueryHistoryManager; private boolean includeSegments; private boolean includeJobs; private boolean includeSegmentDetails; private boolean onlyJobOutput; private String storageType = null; private String engineType = null; private Set<String> requiredResources = Sets.newLinkedHashSet(); private Set<String> optionalResources = Sets.newLinkedHashSet(); private Set<CubeInstance> cubesToTrimAndSave = Sets.newLinkedHashSet();//these cubes needs to be saved skipping segments public CubeMetaExtractor() { super(); packageType = "cubemeta"; OptionGroup realizationOrProject = new OptionGroup(); realizationOrProject.addOption(OPTION_CUBE); realizationOrProject.addOption(OPTION_PROJECT); realizationOrProject.addOption(OPTION_HYBRID); realizationOrProject.addOption(OPTION_All_PROJECT); realizationOrProject.setRequired(true); options.addOptionGroup(realizationOrProject); options.addOption(OPTION_INCLUDE_SEGMENTS); options.addOption(OPTION_INCLUDE_JOB); options.addOption(OPTION_INCLUDE_SEGMENT_DETAILS); options.addOption(OPTION_INCLUDE_ONLY_JOB_OUTPUT); options.addOption(OPTION_STORAGE_TYPE); options.addOption(OPTION_ENGINE_TYPE); } @Override protected void executeExtract(OptionsHelper optionsHelper, File exportDir) throws Exception { includeSegments = optionsHelper.hasOption(OPTION_INCLUDE_SEGMENTS) ? Boolean.valueOf(optionsHelper.getOptionValue(OPTION_INCLUDE_SEGMENTS)) : true; includeJobs = optionsHelper.hasOption(OPTION_INCLUDE_JOB) ? Boolean.valueOf(optionsHelper.getOptionValue(OPTION_INCLUDE_JOB)) : false; includeSegmentDetails = optionsHelper.hasOption(OPTION_INCLUDE_SEGMENT_DETAILS) ? Boolean.valueOf(optionsHelper.getOptionValue(OPTION_INCLUDE_SEGMENT_DETAILS)) : false; onlyJobOutput = optionsHelper.hasOption(OPTION_INCLUDE_ONLY_JOB_OUTPUT) ? Boolean.valueOf(optionsHelper.getOptionValue(OPTION_INCLUDE_ONLY_JOB_OUTPUT)) : true; storageType = optionsHelper.hasOption(OPTION_STORAGE_TYPE) ? optionsHelper.getOptionValue(OPTION_STORAGE_TYPE) : null; engineType = optionsHelper.hasOption(OPTION_ENGINE_TYPE) ? optionsHelper.getOptionValue(OPTION_ENGINE_TYPE) : null; kylinConfig = KylinConfig.getInstanceFromEnv(); metadataManager = MetadataManager.getInstance(kylinConfig); projectManager = ProjectManager.getInstance(kylinConfig); hybridManager = HybridManager.getInstance(kylinConfig); cubeManager = CubeManager.getInstance(kylinConfig); cubeDescManager = CubeDescManager.getInstance(kylinConfig); executableDao = ExecutableDao.getInstance(kylinConfig); realizationRegistry = RealizationRegistry.getInstance(kylinConfig); badQueryHistoryManager = BadQueryHistoryManager.getInstance(kylinConfig); addRequired(ResourceStore.METASTORE_UUID_TAG); if (optionsHelper.hasOption(OPTION_All_PROJECT)) { for (ProjectInstance projectInstance : projectManager.listAllProjects()) { requireProject(projectInstance); } } else if (optionsHelper.hasOption(OPTION_PROJECT)) { String projectNames = optionsHelper.getOptionValue(OPTION_PROJECT); for (String projectName : projectNames.split(",")) { ProjectInstance projectInstance = projectManager.getProject(projectName); Preconditions.checkNotNull(projectInstance, "Project " + projectName + " does not exist."); requireProject(projectInstance); } } else if (optionsHelper.hasOption(OPTION_CUBE)) { String cubeNames = optionsHelper.getOptionValue(OPTION_CUBE); for (String cubeName : cubeNames.split(",")) { IRealization realization = cubeManager.getRealization(cubeName); if (realization == null) { throw new IllegalArgumentException("No cube found with name of " + cubeName); } else { retrieveResourcePath(realization); } } } else if (optionsHelper.hasOption(OPTION_HYBRID)) { String hybridNames = optionsHelper.getOptionValue(OPTION_HYBRID); for (String hybridName : hybridNames.split(",")) { IRealization realization = hybridManager.getRealization(hybridName); if (realization != null) { retrieveResourcePath(realization); } else { throw new IllegalArgumentException("No hybrid found with name of" + hybridName); } } } executeExtraction(exportDir.getAbsolutePath()); engineOverwrite(new File(exportDir.getAbsolutePath())); } private void requireProject(ProjectInstance projectInstance) throws IOException { addRequired(projectInstance.getResourcePath()); List<RealizationEntry> realizationEntries = projectInstance.getRealizationEntries(); for (RealizationEntry realizationEntry : realizationEntries) { retrieveResourcePath(getRealization(realizationEntry)); } List<DataModelDesc> modelDescs = metadataManager.getModels(projectInstance.getName()); for (DataModelDesc modelDesc : modelDescs) { addRequired(DataModelDesc.concatResourcePath(modelDesc.getName())); } addOptional(badQueryHistoryManager.getBadQueriesForProject(projectInstance.getName()).getResourcePath()); } private void executeExtraction(String dest) { logger.info("The resource paths going to be extracted:"); for (String s : requiredResources) { logger.info(s + "(required)"); } for (String s : optionalResources) { logger.info(s + "(optional)"); } for (CubeInstance cube : cubesToTrimAndSave) { logger.info("Cube {} will be trimmed and extracted", cube); } try { KylinConfig srcConfig = KylinConfig.getInstanceFromEnv(); KylinConfig dstConfig = KylinConfig.createInstanceFromUri(dest); ResourceTool.copy(srcConfig, dstConfig, Lists.newArrayList(requiredResources), true); for (String r : optionalResources) { try { ResourceTool.copy(srcConfig, dstConfig, Lists.newArrayList(r), true); } catch (Exception e) { logger.warn( "Exception when copying optional resource {}. May be caused by resource missing. skip it.", r); } } ResourceStore dstStore = ResourceStore.getStore(dstConfig); for (CubeInstance cube : cubesToTrimAndSave) { CubeInstance trimmedCube = CubeInstance.getCopyOf(cube); trimmedCube.getSegments().clear(); trimmedCube.setUuid(cube.getUuid()); dstStore.putResource(trimmedCube.getResourcePath(), trimmedCube, CubeManager.CUBE_SERIALIZER); } } catch (Exception e) { throw new RuntimeException("Exception", e); } } private void engineOverwrite(File dest) throws IOException { if (engineType != null || storageType != null) { for (File f : dest.listFiles()) { if (f.isDirectory()) { engineOverwrite(f); } else { engineOverwriteInternal(f); } } } } private void engineOverwriteInternal(File f) throws IOException { try { ObjectMapper objectMapper = new ObjectMapper(); JsonNode rootNode = objectMapper.readTree(f); boolean replaced = false; if (engineType != null && rootNode.get("engine_type") != null) { ((ObjectNode) rootNode).put("engine_type", Integer.parseInt(engineType)); replaced = true; } if (storageType != null && rootNode.get("storage_type") != null) { ((ObjectNode) rootNode).put("storage_type", Integer.parseInt(storageType)); replaced = true; } if (replaced) { objectMapper.enable(SerializationFeature.INDENT_OUTPUT); objectMapper.writeValue(f, rootNode); } } catch (JsonProcessingException ex) { logger.warn("cannot parse file {}", f); } } private IRealization getRealization(RealizationEntry realizationEntry) { return realizationRegistry.getRealization(realizationEntry.getType(), realizationEntry.getRealization()); } private void dealWithStreaming(CubeInstance cube) { streamingManager = StreamingManager.getInstance(kylinConfig); for (StreamingConfig streamingConfig : streamingManager.listAllStreaming()) { if (streamingConfig.getName() != null && streamingConfig.getName().equalsIgnoreCase(cube.getRootFactTable())) { addRequired(StreamingConfig.concatResourcePath(streamingConfig.getName())); addRequired(KafkaConfig.concatResourcePath(streamingConfig.getName())); } } } private void retrieveResourcePath(IRealization realization) { if (realization == null) { return; } logger.info("Deal with realization {} of type {}", realization.getName(), realization.getType()); if (realization instanceof CubeInstance) { CubeInstance cube = (CubeInstance) realization; String descName = cube.getDescName(); CubeDesc cubeDesc = cubeDescManager.getCubeDesc(descName); String modelName = cubeDesc.getModelName(); DataModelDesc modelDesc = metadataManager.getDataModelDesc(modelName); dealWithStreaming(cube); for (TableRef table : modelDesc.getAllTables()) { String tableName = table.getTableIdentity(); addRequired(TableDesc.concatResourcePath(tableName)); addOptional(TableDesc.concatExdResourcePath(tableName)); } addRequired(DataModelDesc.concatResourcePath(modelDesc.getName())); addRequired(CubeDesc.concatResourcePath(cubeDesc.getName())); if (includeSegments) { addRequired(CubeInstance.concatResourcePath(cube.getName())); for (CubeSegment segment : cube.getSegments(SegmentStatusEnum.READY)) { addRequired(CubeSegment.getStatisticsResourcePath(cube.getName(), segment.getUuid())); if (includeSegmentDetails) { for (String dictPat : segment.getDictionaryPaths()) { addRequired(dictPat); } for (String snapshotPath : segment.getSnapshotPaths()) { addRequired(snapshotPath); } } if (includeJobs) { String lastJobId = segment.getLastBuildJobID(); if (StringUtils.isEmpty(lastJobId)) { throw new RuntimeException("No job exist for segment :" + segment); } else { try { if (onlyJobOutput) { ExecutablePO executablePO = executableDao.getJob(lastJobId); addRequired(ResourceStore.EXECUTE_OUTPUT_RESOURCE_ROOT + "/" + lastJobId); } else { ExecutablePO executablePO = executableDao.getJob(lastJobId); addRequired(ResourceStore.EXECUTE_RESOURCE_ROOT + "/" + lastJobId); addRequired(ResourceStore.EXECUTE_OUTPUT_RESOURCE_ROOT + "/" + lastJobId); for (ExecutablePO task : executablePO.getTasks()) { addRequired(ResourceStore.EXECUTE_RESOURCE_ROOT + "/" + task.getUuid()); addRequired( ResourceStore.EXECUTE_OUTPUT_RESOURCE_ROOT + "/" + task.getUuid()); } } } catch (PersistentException e) { throw new RuntimeException("PersistentException", e); } } } } } else { if (includeJobs) { logger.warn("It's useless to set includeJobs to true when includeSegments is set to false"); } cube.setStatus(RealizationStatusEnum.DISABLED); cubesToTrimAndSave.add(cube); } } else if (realization instanceof HybridInstance) { HybridInstance hybridInstance = (HybridInstance) realization; addRequired(HybridInstance.concatResourcePath(hybridInstance.getName())); for (IRealization iRealization : hybridInstance.getRealizations()) { if (iRealization.getType() != RealizationType.CUBE) { throw new RuntimeException("Hybrid " + iRealization.getName() + " contains non cube child " + iRealization.getName() + " with type " + iRealization.getType()); } retrieveResourcePath(iRealization); } } else { logger.warn("Unknown realization type: " + realization.getType()); } } private void addRequired(String record) { logger.info("adding required resource {}", record); requiredResources.add(record); } private void addOptional(String record) { logger.info("adding optional resource {}", record); optionalResources.add(record); } public static void main(String[] args) { CubeMetaExtractor extractor = new CubeMetaExtractor(); extractor.execute(args); } }