Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.uber.hoodie.common.table.log; import com.uber.hoodie.common.model.HoodieLogFile; import com.uber.hoodie.common.storage.StorageSchemes; import com.uber.hoodie.common.table.log.HoodieLogFormat.Writer; import com.uber.hoodie.common.table.log.HoodieLogFormat.WriterBuilder; import com.uber.hoodie.common.table.log.block.HoodieLogBlock; import com.uber.hoodie.common.util.FSUtils; import com.uber.hoodie.exception.HoodieException; import com.uber.hoodie.exception.HoodieIOException; import java.io.IOException; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException; import org.apache.hadoop.ipc.RemoteException; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; /** * HoodieLogFormatWriter can be used to append blocks to a log file Use * HoodieLogFormat.WriterBuilder to construct */ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer { private static final Logger log = LogManager.getLogger(HoodieLogFormatWriter.class); private HoodieLogFile logFile; private final FileSystem fs; private final long sizeThreshold; private final Integer bufferSize; private final Short replication; private final String logWriteToken; private final String rolloverLogWriteToken; private FSDataOutputStream output; private static final String APPEND_UNAVAILABLE_EXCEPTION_MESSAGE = "not sufficiently replicated yet"; /** * @param fs * @param logFile * @param bufferSize * @param replication * @param sizeThreshold */ HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize, Short replication, Long sizeThreshold, String logWriteToken, String rolloverLogWriteToken) throws IOException, InterruptedException { this.fs = fs; this.logFile = logFile; this.sizeThreshold = sizeThreshold; this.bufferSize = bufferSize; this.replication = replication; this.logWriteToken = logWriteToken; this.rolloverLogWriteToken = rolloverLogWriteToken; Path path = logFile.getPath(); if (fs.exists(path)) { boolean isAppendSupported = StorageSchemes.isAppendSupported(fs.getScheme()); if (isAppendSupported) { log.info(logFile + " exists. Appending to existing file"); try { this.output = fs.append(path, bufferSize); } catch (RemoteException e) { log.warn("Remote Exception, attempting to handle or recover lease", e); handleAppendExceptionOrRecoverLease(path, e); } catch (IOException ioe) { if (ioe.getMessage().toLowerCase().contains("not supported")) { // may still happen if scheme is viewfs. isAppendSupported = false; } else { throw ioe; } } } if (!isAppendSupported) { this.logFile = logFile.rollOver(fs, rolloverLogWriteToken); log.info("Append not supported.. Rolling over to " + logFile); createNewFile(); } } else { log.info(logFile + " does not exist. Create a new file"); // Block size does not matter as we will always manually autoflush createNewFile(); } } public FileSystem getFs() { return fs; } public HoodieLogFile getLogFile() { return logFile; } public long getSizeThreshold() { return sizeThreshold; } @Override public Writer appendBlock(HoodieLogBlock block) throws IOException, InterruptedException { // Find current version HoodieLogFormat.LogFormatVersion currentLogFormatVersion = new HoodieLogFormatVersion( HoodieLogFormat.currentVersion); long currentSize = this.output.size(); // 1. Write the magic header for the start of the block this.output.write(HoodieLogFormat.MAGIC); // bytes for header byte[] headerBytes = HoodieLogBlock.getLogMetadataBytes(block.getLogBlockHeader()); // content bytes byte[] content = block.getContentBytes(); // bytes for footer byte[] footerBytes = HoodieLogBlock.getLogMetadataBytes(block.getLogBlockFooter()); // 2. Write the total size of the block (excluding Magic) this.output.writeLong(getLogBlockLength(content.length, headerBytes.length, footerBytes.length)); // 3. Write the version of this log block this.output.writeInt(currentLogFormatVersion.getVersion()); // 4. Write the block type this.output.writeInt(block.getBlockType().ordinal()); // 5. Write the headers for the log block this.output.write(headerBytes); // 6. Write the size of the content block this.output.writeLong(content.length); // 7. Write the contents of the data block this.output.write(content); // 8. Write the footers for the log block this.output.write(footerBytes); // 9. Write the total size of the log block (including magic) which is everything written // until now (for reverse pointer) this.output.writeLong(this.output.size() - currentSize); // Flush every block to disk flush(); // roll over if size is past the threshold return rolloverIfNeeded(); } /** * This method returns the total LogBlock Length which is the sum of 1. Number of bytes to write * version 2. Number of bytes to write ordinal 3. Length of the headers 4. Number of bytes used to * write content length 5. Length of the content 6. Length of the footers 7. Number of bytes to * write totalLogBlockLength */ private int getLogBlockLength(int contentLength, int headerLength, int footerLength) { return Integer.BYTES + // Number of bytes to write version Integer.BYTES + // Number of bytes to write ordinal headerLength + // Length of the headers Long.BYTES + // Number of bytes used to write content length contentLength + // Length of the content footerLength + // Length of the footers Long.BYTES; // bytes to write totalLogBlockLength at end of block (for reverse ptr) } private Writer rolloverIfNeeded() throws IOException, InterruptedException { // Roll over if the size is past the threshold if (getCurrentSize() > sizeThreshold) { //TODO - make an end marker which seals the old log file (no more appends possible to that // file). log.info("CurrentSize " + getCurrentSize() + " has reached threshold " + sizeThreshold + ". Rolling over to the next version"); HoodieLogFile newLogFile = logFile.rollOver(fs, rolloverLogWriteToken); // close this writer and return the new writer close(); return new HoodieLogFormatWriter(fs, newLogFile, bufferSize, replication, sizeThreshold, logWriteToken, rolloverLogWriteToken); } return this; } private void createNewFile() throws IOException { this.output = fs.create(this.logFile.getPath(), false, bufferSize, replication, WriterBuilder.DEFAULT_SIZE_THRESHOLD, null); } @Override public void close() throws IOException { flush(); output.close(); output = null; } private void flush() throws IOException { if (output == null) { return; // Presume closed } output.flush(); // NOTE : the following API call makes sure that the data is flushed to disk on DataNodes (akin to POSIX fsync()) // See more details here : https://issues.apache.org/jira/browse/HDFS-744 output.hsync(); } public long getCurrentSize() throws IOException { if (output == null) { throw new IllegalStateException( "Cannot get current size as the underlying stream has been closed already"); } return output.getPos(); } private void handleAppendExceptionOrRecoverLease(Path path, RemoteException e) throws IOException, InterruptedException { if (e.getMessage().contains(APPEND_UNAVAILABLE_EXCEPTION_MESSAGE)) { // This issue happens when all replicas for a file are down and/or being decommissioned. // The fs.append() API could append to the last block for a file. If the last block is full, a new block is // appended to. In a scenario when a lot of DN's are decommissioned, it can happen that DN's holding all // replicas for a block/file are decommissioned together. During this process, all these blocks will start to // get replicated to other active DataNodes but this process might take time (can be of the order of few // hours). During this time, if a fs.append() API is invoked for a file whose last block is eligible to be // appended to, then the NN will throw an exception saying that it couldn't find any active replica with the // last block. Find more information here : https://issues.apache.org/jira/browse/HDFS-6325 log.warn("Failed to open an append stream to the log file. Opening a new log file..", e); // Rollover the current log file (since cannot get a stream handle) and create new one this.logFile = logFile.rollOver(fs, rolloverLogWriteToken); createNewFile(); } else if (e.getClassName().contentEquals(AlreadyBeingCreatedException.class.getName())) { log.warn("Another task executor writing to the same log file(" + logFile + ". Rolling over"); // Rollover the current log file (since cannot get a stream handle) and create new one this.logFile = logFile.rollOver(fs, rolloverLogWriteToken); createNewFile(); } else if (e.getClassName().contentEquals(RecoveryInProgressException.class.getName()) && (fs instanceof DistributedFileSystem)) { // this happens when either another task executor writing to this file died or // data node is going down. Note that we can only try to recover lease for a DistributedFileSystem. // ViewFileSystem unfortunately does not support this operation log.warn("Trying to recover log on path " + path); if (FSUtils.recoverDFSFileLease((DistributedFileSystem) fs, path)) { log.warn("Recovered lease on path " + path); // try again this.output = fs.append(path, bufferSize); } else { log.warn("Failed to recover lease on path " + path); throw new HoodieException(e); } } else { throw new HoodieIOException("Failed to open an append stream ", e); } } }