Java tutorial
/** * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.cdk.data.filesystem; import com.cloudera.cdk.data.spi.ReaderWriterState; import com.cloudera.cdk.data.DatasetReaderException; import com.cloudera.cdk.data.spi.AbstractDatasetReader; import com.google.common.base.Objects; import com.google.common.base.Preconditions; import org.apache.avro.Schema; import org.apache.avro.file.DataFileReader; import org.apache.avro.reflect.ReflectDatumReader; import org.apache.hadoop.fs.AvroFSInput; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; class FileSystemDatasetReader<E> extends AbstractDatasetReader<E> { private final FileSystem fileSystem; private final Path path; private final Schema schema; private ReaderWriterState state; private DataFileReader<E> reader; private static final Logger logger = LoggerFactory.getLogger(FileSystemDatasetReader.class); public FileSystemDatasetReader(FileSystem fileSystem, Path path, Schema schema) { Preconditions.checkArgument(fileSystem != null, "FileSystem cannot be null"); Preconditions.checkArgument(path != null, "Path cannot be null"); Preconditions.checkArgument(schema != null, "Schema cannot be null"); this.fileSystem = fileSystem; this.path = path; this.schema = schema; this.state = ReaderWriterState.NEW; } @Override public void open() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "A reader may not be opened more than once - current state:%s", state); logger.debug("Opening reader on path:{}", path); try { reader = new DataFileReader<E>( new AvroFSInput(fileSystem.open(path), fileSystem.getFileStatus(path).getLen()), new ReflectDatumReader<E>(schema)); } catch (IOException e) { throw new DatasetReaderException("Unable to create reader path:" + path, e); } state = ReaderWriterState.OPEN; } @Override public boolean hasNext() { Preconditions.checkState(state.equals(ReaderWriterState.OPEN), "Attempt to read from a file in state:%s", state); return reader.hasNext(); } @Override public E next() { Preconditions.checkState(state.equals(ReaderWriterState.OPEN), "Attempt to read from a file in state:%s", state); return reader.next(); } @Override public void close() { if (!state.equals(ReaderWriterState.OPEN)) { return; } logger.debug("Closing reader on path:{}", path); try { reader.close(); } catch (IOException e) { throw new DatasetReaderException("Unable to close reader path:" + path, e); } state = ReaderWriterState.CLOSED; } @Override public boolean isOpen() { return state.equals(ReaderWriterState.OPEN); } @Override public String toString() { return Objects.toStringHelper(this).add("fileSystem", fileSystem).add("path", path).add("schema", schema) .add("state", state).add("reader", reader).toString(); } }