Java tutorial
/** * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.cdk.data.filesystem; import com.cloudera.cdk.data.spi.ReaderWriterState; import com.cloudera.cdk.data.DatasetWriter; import com.cloudera.cdk.data.DatasetWriterException; import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.base.Supplier; import com.google.common.io.Closeables; import org.apache.avro.Schema; import org.apache.avro.file.CodecFactory; import org.apache.avro.file.DataFileWriter; import org.apache.avro.io.DatumWriter; import org.apache.avro.reflect.ReflectDatumWriter; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; class FileSystemDatasetWriter<E> implements DatasetWriter<E> { private static final Logger logger = LoggerFactory.getLogger(FileSystemDatasetWriter.class); private Path path; private Schema schema; private FileSystem fileSystem; private boolean enableCompression; private Path pathTmp; private FSDataOutputStream out; private DataFileWriter<E> dataFileWriter; private DatumWriter<E> writer; private ReaderWriterState state; public FileSystemDatasetWriter(FileSystem fileSystem, Path path, Schema schema, boolean enableCompression) { this.fileSystem = fileSystem; this.path = path; this.pathTmp = new Path(path.getParent(), "." + path.getName() + ".tmp"); this.schema = schema; this.enableCompression = enableCompression; this.state = ReaderWriterState.NEW; } @Override public void open() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s", state); logger.debug("Opening data file with pathTmp:{} (final path will be path:{})", pathTmp, path); writer = new ReflectDatumWriter<E>(); dataFileWriter = new DataFileWriter<E>(writer); /* * We may want to expose the codec in the writer and simply rely on the * builder and proper instantiation from dataset-level configuration. * Hard-coding snappy seems a little too draconian. */ if (enableCompression) { dataFileWriter.setCodec(CodecFactory.snappyCodec()); } try { out = fileSystem.create(pathTmp, true); dataFileWriter.create(schema, out); } catch (IOException e) { throw new DatasetWriterException("Unable to create writer to path:" + pathTmp, e); } state = ReaderWriterState.OPEN; } @Override public void write(E entity) { Preconditions.checkState(state.equals(ReaderWriterState.OPEN), "Attempt to write to a writer in state:%s", state); try { dataFileWriter.append(entity); } catch (IOException e) { throw new DatasetWriterException("Unable to write entity:" + entity + " with writer:" + dataFileWriter, e); } } @Override public void flush() { Preconditions.checkState(state.equals(ReaderWriterState.OPEN), "Attempt to write to a writer in state:%s", state); try { dataFileWriter.flush(); out.hflush(); } catch (IOException e) { throw new DatasetWriterException("Unable to flush file writer:" + dataFileWriter); } } @Override public void close() { if (state.equals(ReaderWriterState.OPEN)) { logger.debug("Closing pathTmp:{}", pathTmp); try { Closeables.close(dataFileWriter, false); } catch (IOException e) { throw new DatasetWriterException( "Unable to close writer:" + dataFileWriter + " to path:" + pathTmp); } logger.debug("Committing pathTmp:{} to path:{}", pathTmp, path); try { if (!fileSystem.rename(pathTmp, path)) { throw new DatasetWriterException("Failed to move " + pathTmp + " to " + path); } } catch (IOException e) { throw new DatasetWriterException("Internal error while trying to commit path:" + pathTmp, e); } state = ReaderWriterState.CLOSED; } } @Override public boolean isOpen() { return state.equals(ReaderWriterState.OPEN); } @Override public String toString() { return Objects.toStringHelper(this).add("path", path).add("schema", schema).add("fileSystem", fileSystem) .add("enableCompression", enableCompression).add("pathTmp", pathTmp) .add("dataFileWriter", dataFileWriter).add("writer", writer).add("state", state).toString(); } public static class Builder { private FileSystem fileSystem; private Path path; private Schema schema; private boolean enableCompression; public Builder() { enableCompression = true; } public Builder fileSystem(FileSystem fileSystem) { this.fileSystem = fileSystem; return this; } public Builder path(Path path) { this.path = path; return this; } public Builder schema(Schema schema) { this.schema = schema; return this; } public Builder enableCompression(boolean enableCompression) { this.enableCompression = enableCompression; return this; } public <E> FileSystemDatasetWriter<E> build() { Preconditions.checkState(fileSystem != null, "File system is not defined"); Preconditions.checkState(path != null, "Path is not defined"); Preconditions.checkState(schema != null, "Schema is not defined"); return new FileSystemDatasetWriter<E>(fileSystem, path, schema, enableCompression); } } }