Java tutorial
/* * Copyright (c) 2017 VMware Inc. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.hillview.storage; import org.apache.commons.compress.compressors.CompressorException; import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.commons.io.ByteOrderMark; import org.apache.commons.io.input.BOMInputStream; import org.hillview.table.api.IAppendableColumn; import org.hillview.table.api.ITable; import org.hillview.utils.HillviewLogger; import org.hillview.utils.Utilities; import javax.annotation.Nullable; import java.io.*; /** * Abstract class for a reader that reads data from a text file and keeps * track of the current position within the file. These loaders are * only allocated where the data is, they are not serializable. */ abstract class TextFileLoader { final String filename; int currentRow; int currentColumn; @Nullable IAppendableColumn[] columns; private long currentField; @Nullable private String currentToken; boolean allowFewerColumns; // Some of these may be null @Nullable private InputStream inputStream = null; @Nullable private InputStream bufferedInputStream = null; @Nullable private InputStream compressedStream = null; @Nullable private BOMInputStream bomStream = null; TextFileLoader(String path) { this.filename = path; this.currentRow = 0; this.currentColumn = 0; this.currentField = 0; this.currentToken = null; } Reader getFileReader() { try { HillviewLogger.instance.info("Reading file", "{0}", this.filename); this.inputStream = new FileInputStream(this.filename); this.bufferedInputStream = new BufferedInputStream(inputStream); // The buffered input stream is needed by the CompressorStream // to detect the compression method at runtime. InputStream fis = this.bufferedInputStream; if (Utilities.isCompressed(this.filename)) { this.compressedStream = new CompressorStreamFactory().createCompressorInputStream(fis); fis = this.compressedStream; } this.bomStream = new BOMInputStream(fis, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); ByteOrderMark bom = this.bomStream.getBOM(); String charsetName = bom == null ? "UTF-8" : bom.getCharsetName(); return new InputStreamReader(this.bomStream, charsetName); } catch (IOException | CompressorException e) { throw new RuntimeException(e); } } /** * Relinquishes all resources used. * @param reader Reader that was created by getFileReader, or null. */ void close(@Nullable Reader reader) { try { if (reader != null) reader.close(); if (this.bomStream != null) this.bomStream.close(); if (this.compressedStream != null) this.compressedStream.close(); if (this.bufferedInputStream != null) this.bufferedInputStream.close(); if (this.inputStream != null) this.inputStream.close(); } catch (IOException e) { HillviewLogger.instance.error("Error closing input streams", e); } } void append(String[] data) { try { assert this.columns != null; int columnCount = this.columns.length; this.currentColumn = 0; if (data.length > columnCount) this.error("Too many columns " + data.length + " vs " + columnCount); for (this.currentColumn = 0; this.currentColumn < data.length; this.currentColumn++) { this.currentToken = data[this.currentColumn]; this.columns[this.currentColumn].parseAndAppendString(this.currentToken); this.currentField++; if ((this.currentField % 100000) == 0) { System.out.print("."); System.out.flush(); } } if (data.length < columnCount) { if (!this.allowFewerColumns) this.error("Too few columns " + data.length + " vs " + columnCount); else { this.currentToken = ""; for (int i = data.length; i < columnCount; i++) this.columns[i].parseAndAppendString(this.currentToken); } } this.currentRow++; } catch (Exception ex) { this.error(ex); } } private String errorMessage() { String columnName = ""; if (this.columns != null) { columnName = (this.currentColumn < this.columns.length) ? (" (" + this.columns[this.currentColumn].toString() + ")") : ""; } return "Error while parsing file " + this.filename + "@" + Utilities.getHostName() + " line " + this.currentRow + (this.currentColumn >= 0 ? " column " + this.currentColumn + columnName : "") + (this.currentToken != null ? " token " + this.currentToken : ""); } void error(String message) { if (message.length() > 2048) { int lastIndex = message.length() - 48; message = message.substring(0, 2000) + "..." + message.substring(lastIndex); } throw new RuntimeException(this.errorMessage() + ": " + message); } private void error(Exception ex) { throw new RuntimeException(this.errorMessage(), ex); } public abstract ITable load(); }