Java tutorial
/** * Licensed to Cloudera, Inc. under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Cloudera, Inc. licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.flume.handlers.hdfs; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.io.compress.Compressor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.cloudera.flume.conf.Context; import com.cloudera.flume.conf.FlumeBuilder; import com.cloudera.flume.conf.FlumeConfiguration; import com.cloudera.flume.conf.FlumeSpecException; import com.cloudera.flume.conf.SinkFactory.SinkBuilder; import com.cloudera.flume.core.Event; import com.cloudera.flume.core.EventSink; import com.cloudera.flume.handlers.text.FormatFactory; import com.cloudera.flume.handlers.text.output.OutputFormat; import com.cloudera.flume.reporter.ReportEvent; import com.cloudera.util.PathManager; import com.google.common.base.Preconditions; /** * This creates a raw hadoop dfs file that outputs data formatted by the * provided OutputFormat. It is assumed that the output is a file of some sort. */ public class CustomDfsSink extends EventSink.Base { static final Logger LOG = LoggerFactory.getLogger(CustomDfsSink.class); private static final String A_OUTPUTFORMAT = "recordformat"; boolean compressOutput; OutputFormat format; OutputStream writer; AtomicLong count = new AtomicLong(); String path; Path dstPath; PathManager pathManager; public CustomDfsSink(String path, OutputFormat format) { Preconditions.checkArgument(path != null); Preconditions.checkArgument(format != null); this.path = path; this.format = format; this.writer = null; } @Override public void append(Event e) throws IOException, InterruptedException { if (writer == null) { throw new IOException("Append failed, did you open the writer?"); } format.format(writer, e); count.getAndIncrement(); super.append(e); } @Override public void close() throws IOException { LOG.info("Closing HDFS file: " + pathManager.getOpenPath()); writer.flush(); LOG.info("done writing raw file to hdfs"); writer.close(); pathManager.close(); writer = null; } /** * Hadoop Compression Codecs that use Native libs require an instance of a * Configuration Object. They require this due to some check against knowing * weather or not the native libs have been loaded. GzipCodec, LzoCodec, * LzopCodec are all codecs that require Native libs. GZipCodec has a slight * exception that if native libs are not accessible it will use Pure Java. * This results in no errors just notices. BZip2Codec is an example codec that * doesn't use native libs. */ @Override public void open() throws IOException { FlumeConfiguration conf = FlumeConfiguration.get(); FileSystem hdfs; String codecName = conf.getCollectorDfsCompressCodec(); CompressionCodec codec = getCodec(conf, codecName); if (codec == null) { dstPath = new Path(path); hdfs = dstPath.getFileSystem(conf); pathManager = new PathManager(hdfs, dstPath.getParent(), dstPath.getName()); writer = pathManager.open(); LOG.info("Creating HDFS file: " + pathManager.getOpenPath()); return; } Compressor cmp = codec.createCompressor(); dstPath = new Path(path + codec.getDefaultExtension()); hdfs = dstPath.getFileSystem(conf); pathManager = new PathManager(hdfs, dstPath.getParent(), dstPath.getName()); writer = pathManager.open(); try { writer = codec.createOutputStream(writer, cmp); } catch (NullPointerException npe) { // tries to find "native" version of codec, if that fails, then tries to // find java version. If there is no java version, the createOutputStream // exits via NPE. We capture this and convert it into a IOE with a more // useful error message. LOG.error("Unable to load compression codec " + codec); throw new IOException("Unable to load compression codec " + codec); } LOG.info("Creating " + codec + " compressed HDFS file: " + pathManager.getOpenPath()); } private static boolean codecMatches(Class<? extends CompressionCodec> cls, String codecName) { String simpleName = cls.getSimpleName(); if (cls.getName().equals(codecName) || simpleName.equalsIgnoreCase(codecName)) { return true; } if (simpleName.endsWith("Codec")) { String prefix = simpleName.substring(0, simpleName.length() - "Codec".length()); if (prefix.equalsIgnoreCase(codecName)) { return true; } } return false; } public static CompressionCodec getCodec(Configuration conf, String codecName) { List<Class<? extends CompressionCodec>> codecs = CompressionCodecFactory .getCodecClasses(FlumeConfiguration.get()); // Wish we could base this on DefaultCodec but appears not all codec's // extend DefaultCodec(Lzo) CompressionCodec codec = null; ArrayList<String> codecStrs = new ArrayList<String>(); codecStrs.add("None"); for (Class<? extends CompressionCodec> cls : codecs) { codecStrs.add(cls.getSimpleName()); if (codecMatches(cls, codecName)) { try { codec = cls.newInstance(); } catch (InstantiationException e) { LOG.error("Unable to instantiate " + cls + " class"); } catch (IllegalAccessException e) { LOG.error("Unable to access " + cls + " class"); } } } if (codec == null) { if (!codecName.equalsIgnoreCase("None")) { throw new IllegalArgumentException( "Unsupported compression codec " + codecName + ". Please choose from: " + codecStrs); } } else if (codec instanceof Configurable) { // Must check instanceof codec as BZip2Codec doesn't inherit Configurable // Must set the configuration for Configurable objects that may or do use // native libs ((Configurable) codec).setConf(conf); } return codec; } public static SinkBuilder builder() { return new SinkBuilder() { @Override public EventSink create(Context context, Object... args) { if (args.length != 2 && args.length != 1) { // TODO (jon) make this message easier. throw new IllegalArgumentException( "usage: customdfs(\"[(hdfs|file|s3n|...)://namenode[:port]]/path\", format)"); } Object format = (args.length == 1) ? null : args[1]; OutputFormat fmt; try { fmt = FlumeBuilder.createFormat(FormatFactory.get(), format); } catch (FlumeSpecException e) { LOG.error("failed to load format " + format, e); throw new IllegalArgumentException("failed to load format " + format); } return new CustomDfsSink(args[0].toString(), fmt); } @Deprecated @Override public EventSink build(Context context, String... args) { // updated interface calls build(Context,Object...) instead throw new RuntimeException("Old sink builder for CustomDfsSink should not be exercised"); } }; } @Override public String getName() { return "CustomDfs"; } @Override public ReportEvent getMetrics() { ReportEvent rpt = super.getMetrics(); rpt.setStringMetric(A_OUTPUTFORMAT, format.getBuilder().getName()); rpt.setLongMetric(ReportEvent.A_COUNT, count.get()); return rpt; } @Deprecated @Override public ReportEvent getReport() { ReportEvent rpt = super.getReport(); rpt.setStringMetric(A_OUTPUTFORMAT, format.getBuilder().getName()); rpt.setLongMetric(ReportEvent.A_COUNT, count.get()); return rpt; } }