Java tutorial
/* * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.uber.hoodie.common.table.log.avro; import com.uber.hoodie.common.table.log.HoodieLogAppendConfig; import com.uber.hoodie.common.table.log.HoodieLogAppender; import com.uber.hoodie.common.util.FSUtils; import com.uber.hoodie.exception.HoodieException; import com.uber.hoodie.exception.HoodieIOException; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.fs.AvroFSInput; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; import org.apache.hadoop.ipc.RemoteException; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import java.io.IOException; import java.util.Iterator; import java.util.List; /** * AvroLogAppender appends a bunch of IndexedRecord to a Avro data file. * If auto-flush is set, every call to append writes out a block. * A avro block corresponds to records appended in a single commit. * * @see org.apache.avro.file.DataFileReader */ public class AvroLogAppender implements HoodieLogAppender<IndexedRecord> { private final static Logger log = LogManager.getLogger(AvroLogAppender.class); private final HoodieLogAppendConfig config; private FSDataOutputStream output; private DataFileWriter<IndexedRecord> writer; private boolean autoFlush; public AvroLogAppender(HoodieLogAppendConfig config) throws IOException, InterruptedException { FileSystem fs = config.getFs(); this.config = config; this.autoFlush = config.isAutoFlush(); GenericDatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(config.getSchema()); this.writer = new DataFileWriter<>(datumWriter); Path path = config.getLogFile().getPath(); if (fs.exists(path)) { //TODO - check for log corruption and roll over if needed log.info(config.getLogFile() + " exists. Appending to existing file"); // this log path exists, we will append to it fs = FileSystem.get(fs.getConf()); try { this.output = fs.append(path, config.getBufferSize()); } catch (RemoteException e) { // this happens when either another task executor writing to this file died or data node is going down if (e.getClassName().equals(AlreadyBeingCreatedException.class.getName()) && fs instanceof DistributedFileSystem) { log.warn("Trying to recover log on path " + path); if (FSUtils.recoverDFSFileLease((DistributedFileSystem) fs, path)) { log.warn("Recovered lease on path " + path); // try again this.output = fs.append(path, config.getBufferSize()); } else { log.warn("Failed to recover lease on path " + path); throw new HoodieException(e); } } } this.writer.appendTo(new AvroFSInput(FileContext.getFileContext(fs.getConf()), path), output); // we always want to flush to disk everytime a avro block is written this.writer.setFlushOnEveryBlock(true); } else { log.info(config.getLogFile() + " does not exist. Create a new file"); this.output = fs.create(path, false, config.getBufferSize(), config.getReplication(), config.getBlockSize(), null); this.writer.create(config.getSchema(), output); this.writer.setFlushOnEveryBlock(true); // We need to close the writer to be able to tell the name node that we created this file // this.writer.close(); } } public void append(Iterator<IndexedRecord> records) throws IOException { records.forEachRemaining(r -> { try { writer.append(r); } catch (IOException e) { throw new HoodieIOException("Could not append record " + r + " to " + config.getLogFile()); } }); if (autoFlush) { sync(); } } public void sync() throws IOException { if (output == null || writer == null) return; // Presume closed writer.flush(); output.flush(); output.hflush(); } public void close() throws IOException { sync(); writer.close(); writer = null; output.close(); output = null; } public long getCurrentSize() throws IOException { if (writer == null) { throw new IllegalStateException( "LogWriter " + config.getLogFile() + " has been closed. Cannot getCurrentSize"); } // writer.sync() returns only the offset for this block and not the global offset return output.getPos(); } }