Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.carbondata.tool; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; import org.apache.carbondata.common.Strings; import org.apache.carbondata.core.datastore.block.BlockletInfos; import org.apache.carbondata.core.datastore.block.TableBlockInfo; import org.apache.carbondata.core.datastore.chunk.AbstractRawColumnChunk; import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; import org.apache.carbondata.core.datastore.chunk.reader.CarbonDataReaderFactory; import org.apache.carbondata.core.datastore.chunk.reader.DimensionColumnChunkReader; import org.apache.carbondata.core.datastore.chunk.reader.MeasureColumnChunkReader; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.memory.MemoryException; import org.apache.carbondata.core.metadata.ColumnarFormatVersion; import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; import org.apache.carbondata.core.util.DataFileFooterConverterV3; import org.apache.carbondata.core.util.path.CarbonTablePath; import org.apache.carbondata.format.FileFooter3; import org.apache.carbondata.format.FileHeader; import org.apache.commons.cli.CommandLine; class ScanBenchmark implements Command { private String dataFolder; private DataFile file; private List<String> outPuts; ScanBenchmark(String dataFolder, List<String> outPuts) { this.dataFolder = dataFolder; this.outPuts = outPuts; } @Override public void run(CommandLine line) throws IOException, MemoryException { if (line.hasOption("f")) { String filePath = line.getOptionValue("f"); file = new DataFile(FileFactory.getCarbonFile(filePath)); } else { FileCollector collector = new FileCollector(outPuts); collector.collectFiles(dataFolder); if (collector.getNumDataFiles() == 0) { return; } Map<String, DataFile> dataFiles = collector.getDataFiles(); Iterator<DataFile> iterator = dataFiles.values().iterator(); // use the first file and close the rest file = iterator.next(); while (iterator.hasNext()) { iterator.next().close(); } } outPuts.add("\n## Benchmark"); final AtomicReference<FileHeader> fileHeaderRef = new AtomicReference<>(); final AtomicReference<FileFooter3> fileFoorterRef = new AtomicReference<>(); final AtomicReference<DataFileFooter> convertedFooterRef = new AtomicReference<>(); // benchmark read header and footer time benchmarkOperation("ReadHeaderAndFooter", new Operation() { @Override public void run() throws IOException, MemoryException { fileHeaderRef.set(file.readHeader()); fileFoorterRef.set(file.readFooter()); } }); final FileHeader fileHeader = fileHeaderRef.get(); final FileFooter3 fileFooter = fileFoorterRef.get(); // benchmark convert footer benchmarkOperation("ConvertFooter", new Operation() { @Override public void run() throws IOException, MemoryException { convertFooter(fileHeader, fileFooter); } }); // benchmark read all meta and convert footer benchmarkOperation("ReadAllMetaAndConvertFooter", new Operation() { @Override public void run() throws IOException, MemoryException { DataFileFooter footer = readAndConvertFooter(file); convertedFooterRef.set(footer); } }); if (line.hasOption("c")) { String columnName = line.getOptionValue("c"); outPuts.add("\nScan column '" + columnName + "'"); final DataFileFooter footer = convertedFooterRef.get(); final AtomicReference<AbstractRawColumnChunk> columnChunk = new AtomicReference<>(); final int columnIndex = file.getColumnIndex(columnName); final boolean dimension = file.getColumn(columnName).isDimensionColumn(); for (int i = 0; i < footer.getBlockletList().size(); i++) { final int blockletId = i; outPuts.add(String.format("Blocklet#%d: total size %s, %,d pages, %,d rows", blockletId, Strings.formatSize(file.getColumnDataSizeInBytes(blockletId, columnIndex)), footer.getBlockletList().get(blockletId).getNumberOfPages(), footer.getBlockletList().get(blockletId).getNumberOfRows())); benchmarkOperation("\tColumnChunk IO", new Operation() { @Override public void run() throws IOException, MemoryException { columnChunk.set(readBlockletColumnChunkIO(footer, blockletId, columnIndex, dimension)); } }); if (dimensionColumnChunkReader != null) { benchmarkOperation("\tDecompress Pages", new Operation() { @Override public void run() throws IOException, MemoryException { decompressDimensionPages(columnChunk.get(), footer.getBlockletList().get(blockletId).getNumberOfPages()); } }); } else { benchmarkOperation("\tDecompress Pages", new Operation() { @Override public void run() throws IOException, MemoryException { decompressMeasurePages(columnChunk.get(), footer.getBlockletList().get(blockletId).getNumberOfPages()); } }); } } } file.close(); } interface Operation { void run() throws IOException, MemoryException; } private void benchmarkOperation(String opName, Operation op) throws IOException, MemoryException { long start, end; start = System.nanoTime(); op.run(); end = System.nanoTime(); outPuts.add(String.format("%s takes %,d us", opName, (end - start) / 1000)); } private DataFileFooter readAndConvertFooter(DataFile file) throws IOException { int numBlocklets = file.getNumBlocklet(); BlockletInfos blockletInfos = new BlockletInfos(numBlocklets, 0, numBlocklets); String segmentId = CarbonTablePath.DataFileUtil.getSegmentNo(file.getFilePath()); TableBlockInfo blockInfo = new TableBlockInfo(file.getFilePath(), file.getFooterOffset(), segmentId, new String[] { "localhost" }, file.getFileSizeInBytes(), blockletInfos, ColumnarFormatVersion.V3, new String[0]); DataFileFooterConverterV3 converter = new DataFileFooterConverterV3(); return converter.readDataFileFooter(blockInfo); } private DataFileFooter convertFooter(FileHeader fileHeader, FileFooter3 fileFooter) { DataFileFooterConverterV3 converter = new DataFileFooterConverterV3(); return converter.convertDataFileFooter(fileHeader, fileFooter); } private DimensionColumnChunkReader dimensionColumnChunkReader; private MeasureColumnChunkReader measureColumnChunkReader; private AbstractRawColumnChunk readBlockletColumnChunkIO(DataFileFooter footer, int blockletId, int columnIndex, boolean dimension) throws IOException { BlockletInfo blockletInfo = footer.getBlockletList().get(blockletId); if (dimension) { dimensionColumnChunkReader = CarbonDataReaderFactory.getInstance().getDimensionColumnChunkReader( ColumnarFormatVersion.V3, blockletInfo, footer.getSegmentInfo().getColumnCardinality(), file.getFilePath(), false); return dimensionColumnChunkReader.readRawDimensionChunk(file.getFileReader(), columnIndex); } else { columnIndex = columnIndex - file.numDimensions(); assert (columnIndex >= 0); measureColumnChunkReader = CarbonDataReaderFactory.getInstance() .getMeasureColumnChunkReader(ColumnarFormatVersion.V3, blockletInfo, file.getFilePath(), false); return measureColumnChunkReader.readRawMeasureChunk(file.getFileReader(), columnIndex); } } private DimensionColumnPage[] decompressDimensionPages(AbstractRawColumnChunk rawColumnChunk, int numPages) throws IOException, MemoryException { DimensionColumnPage[] pages = new DimensionColumnPage[numPages]; for (int i = 0; i < pages.length; i++) { pages[i] = dimensionColumnChunkReader.decodeColumnPage((DimensionRawColumnChunk) rawColumnChunk, i, null); } return pages; } private ColumnPage[] decompressMeasurePages(AbstractRawColumnChunk rawColumnChunk, int numPages) throws IOException, MemoryException { ColumnPage[] pages = new ColumnPage[numPages]; for (int i = 0; i < pages.length; i++) { pages[i] = measureColumnChunkReader.decodeColumnPage((MeasureRawColumnChunk) rawColumnChunk, i, null); } return pages; } }