Java tutorial
/** * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.filesystem; import com.google.common.base.Objects; import com.google.common.io.Closeables; import java.io.IOException; import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.SnappyCodec; import org.kitesdk.data.spi.DynMethods; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import parquet.avro.AvroParquetWriter; import parquet.hadoop.ParquetWriter; import parquet.hadoop.metadata.CompressionCodecName; class ParquetAppender<E extends IndexedRecord> implements FileSystemWriter.FileAppender<E> { private static final Logger logger = LoggerFactory.getLogger(ParquetAppender.class); private static final int DEFAULT_BLOCK_SIZE = 50 * 1024 * 1024; private static final DynMethods.StaticMethod isSnappyNative = new DynMethods.Builder( "SnappyCodec.isNativeCodeLoaded").impl(SnappyCodec.class, "isNativeCodeLoaded") .impl(SnappyCodec.class, "isNativeSnappyLoaded", Configuration.class).buildStatic(); private final Path path; private final Schema schema; private final FileSystem fileSystem; private final boolean enableCompression; private AvroParquetWriter<E> avroParquetWriter = null; public ParquetAppender(FileSystem fileSystem, Path path, Schema schema, boolean enableCompression) { this.fileSystem = fileSystem; this.path = path; this.schema = schema; this.enableCompression = enableCompression; } @Override public void open() throws IOException { CompressionCodecName codecName = CompressionCodecName.UNCOMPRESSED; if (enableCompression) { if ((Boolean) isSnappyNative.invoke(fileSystem.getConf())) { codecName = CompressionCodecName.SNAPPY; } else { logger.warn("Compression enabled, but Snappy native code not loaded. " + "Parquet file will not be compressed."); } } avroParquetWriter = new AvroParquetWriter<E>(fileSystem.makeQualified(path), schema, codecName, DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE); } @Override public void append(E entity) throws IOException { avroParquetWriter.write(entity); } @Override public void flush() { // Parquet doesn't (currently) expose a flush operation } @Override public void close() throws IOException { Closeables.close(avroParquetWriter, false); } @Override public String toString() { return Objects.toStringHelper(this).add("path", path).add("schema", schema).add("fileSystem", fileSystem) .add("avroParquetWriter", avroParquetWriter).toString(); } }