Java tutorial
package com.inmobi.databus.readers; /* * #%L * messaging-client-databus * %% * Copyright (C) 2012 - 2014 InMobi * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Date; import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import com.inmobi.databus.files.CollectorFile; import com.inmobi.databus.files.DatabusStreamFile; import com.inmobi.databus.files.FileMap; import com.inmobi.databus.partition.PartitionId; import com.inmobi.messaging.Message; import com.inmobi.messaging.consumer.util.DatabusUtil; import com.inmobi.messaging.metrics.CollectorReaderStatsExposer; public class CollectorStreamReader extends StreamReader<CollectorFile> { private static final Log LOG = LogFactory.getLog(CollectorStreamReader.class); private long waitTimeForFlush; protected long currentOffset = 0; private boolean sameStream = false; protected FSDataInputStream inStream; protected BufferedReader reader; protected final String streamName; private boolean moveToNext = false; private CollectorReaderStatsExposer collectorMetrics; private StringBuilder builder = new StringBuilder(); private boolean isS3Fs = false; private boolean isLocalStreamAvailable; public CollectorStreamReader(PartitionId partitionId, FileSystem fs, String streamName, Path streamDir, long waitTimeForFlush, long waitTimeForCreate, CollectorReaderStatsExposer metrics, Configuration conf, boolean noNewFiles, Date stopTime, boolean isLocalStreamAvailable) throws IOException { super(partitionId, fs, streamDir, waitTimeForCreate, metrics, noNewFiles, stopTime); this.streamName = streamName; this.waitTimeForFlush = waitTimeForFlush; this.collectorMetrics = (CollectorReaderStatsExposer) (this.metrics); this.isLocalStreamAvailable = isLocalStreamAvailable; LOG.info("Collector reader initialized with partitionId:" + partitionId + " streamDir:" + streamDir + " waitTimeForFlush:" + waitTimeForFlush + " waitTimeForCreate:" + waitTimeForCreate); isS3Fs = isFileSystemS3(); } protected FileMap<CollectorFile> createFileMap() throws IOException { return new FileMap<CollectorFile>() { @Override protected PathFilter createPathFilter() { return new PathFilter() { @Override public boolean accept(Path p) { if (p.getName().endsWith("_current") || p.getName().endsWith("_stats")) { return false; } return true; } }; } /* * prepare a fileMap with files which are beyond the stopTime */ @Override protected void buildList() throws IOException { if (fsIsPathExists(streamDir)) { FileStatus[] fileStatuses = fsListFileStatus(streamDir, pathFilter); if (fileStatuses == null || fileStatuses.length == 0) { LOG.info("No files in directory:" + streamDir); return; } if (stopTime == null) { for (FileStatus file : fileStatuses) { addPath(file); } } else { for (FileStatus file : fileStatuses) { Date currentTimeStamp = getDateFromCollectorFile(file.getPath().getName()); if (stopTime.before(currentTimeStamp)) { stopListing(); continue; } addPath(file); } } } else { LOG.info("Collector directory does not exist"); } } @Override protected TreeMap<CollectorFile, FileStatus> createFilesMap() { return new TreeMap<CollectorFile, FileStatus>(); } @Override protected CollectorFile getStreamFile(String fileName) { return CollectorFile.create(fileName); } @Override protected CollectorFile getStreamFile(FileStatus file) { return CollectorFile.create(file.getPath().getName()); } }; } protected void initCurrentFile() { super.initCurrentFile(); sameStream = false; } protected boolean openCurrentFile(boolean next) throws IOException { closeCurrentFile(); if (getCurrentFile() == null) { return false; } if (next) { resetCurrentFileSettings(); } LOG.info("Opening file:" + getCurrentFile() + " NumLinesTobeSkipped when" + " opening:" + currentLineNum); if (fsIsPathExists(getCurrentFile())) { inStream = fsOpen(getCurrentFile()); reader = new BufferedReader(new InputStreamReader(inStream)); skipOldData(); } else { LOG.info("CurrentFile:" + getCurrentFile() + " does not exist"); } return true; } protected synchronized void closeCurrentFile() throws IOException { if (reader != null) { reader.close(); reader = null; } if (inStream != null) { inStream.close(); inStream = null; } } protected Message readRawLine() throws IOException { int next = reader.read(); while ((char) next != '\n') { if (next == -1) { LOG.info("reading EOF before a line feed "); return null; } builder.append((char) next); next = reader.read(); } String line = builder.toString(); builder.setLength(0); if (line != null) { return DatabusUtil.decodeMessage(line.getBytes()); } else { return null; } } @Override protected Date getTimeStampFromCollectorStreamFile(FileStatus file) { try { return CollectorStreamReader.getDateFromCollectorFile(getCurrentFile().getName()); } catch (IOException exception) { LOG.info("Not able to get timestamp from " + getCurrentFile() + " file " + exception); } return null; } protected Message readNextLine() throws IOException { Message line = null; if (inStream != null) { line = super.readNextLine(); currentOffset = inStream.getPos(); } return line; } protected void resetCurrentFileSettings() { super.resetCurrentFileSettings(); currentOffset = 0; moveToNext = false; if (builder.length() != 0) { LOG.warn("Discarding partial message " + builder.toString()); builder.setLength(0); } } protected void skipOldData() throws IOException { if (sameStream && !isS3Fs) { LOG.info("Seeking to offset:" + currentOffset); inStream.seek(currentOffset); } else { skipLines(currentLineNum); sameStream = true; currentOffset = inStream.getPos(); } } public Message readLine() throws IOException, InterruptedException { if (closed) { LOG.info("Stream closed"); return null; } Message line = readNextLine(); while (line == null) { // reached end of file? LOG.info("Read " + getCurrentFile() + " with lines:" + currentLineNum); if (closed) { LOG.info("Stream closed"); break; } Path lastFile = getLastFile(); // rebuild file list only if local stream is available because some files // may move to local stream if (isLocalStreamAvailable || !hasNextFile()) { build(); // rebuild file list } if (!hasNextFile()) { //there is no next files // stop reading if it read till stopTime if (hasReadFully()) { LOG.info("read all files till stop date"); break; } if (!setIterator()) { LOG.info("Could not find current file in the stream"); if (isWithinStream(getCurrentFile().getName()) || !isLocalStreamAvailable) { LOG.info("Staying in collector stream as earlier files still exist"); startFromNextHigherAndOpen(getCurrentFile().getName()); updateLatestMinuteAlreadyReadForCollectorReader(); LOG.info("Reading from the next higher file"); } else { LOG.info("Current file would have been moved to Local Stream"); return null; } } else { waitForFlushAndReOpen(); LOG.info("Reading from the same file after reopen"); } } else { // reopen a file only if the file is last file on the stream // and local stream is not available if (moveToNext || (lastFile != null && !(lastFile.equals(getCurrentFile())))) { setNextFile(); updateLatestMinuteAlreadyReadForCollectorReader(); LOG.info("Reading from next file: " + getCurrentFile()); } else { LOG.info("Reading from same file before moving to next"); // open the same file reOpen(); moveToNext = true; } } line = readNextLine(); } return line; } private void reOpen() throws IOException { openCurrentFile(false); } private void waitForFlushAndReOpen() throws IOException, InterruptedException { if (!closed) { LOG.info("Waiting for flush"); Thread.sleep(waitTimeForFlush); collectorMetrics.incrementWaitTimeUnitsInSameFile(); reOpen(); } } private void startFromNextHigherAndOpen(String fileName) throws IOException, InterruptedException { boolean ret = startFromNextHigher(fileName); if (ret) { openCurrentFile(true); } } public boolean startFromNextHigher(String fileName) throws IOException, InterruptedException { if (!setNextHigher(fileName)) { waitForNextFileCreation(fileName); } return true; } private void waitForNextFileCreation(String fileName) throws IOException, InterruptedException { while (!closed && !setNextHigher(fileName) && !hasReadFully()) { waitForFileCreate(); build(); } } @Override protected CollectorFile getStreamFile(Date timestamp) { return getCollectorFile(streamName, timestamp); } protected CollectorFile getStreamFile(FileStatus status) { return getCollectorFile(status.getPath().getName()); } public static boolean isCollectorFile(String fileName) { try { getCollectorFile(fileName); } catch (IllegalArgumentException ie) { return false; } return true; } public static String getCollectorFileName(String streamName, String localStreamfile) { return DatabusStreamFile.create(streamName, localStreamfile).getCollectorFile().toString(); } public static Date getDateFromCollectorFile(String fileName) throws IOException { return getCollectorFile(fileName).getTimestamp(); } public static String getCollectorFileName(String streamName, Date date) { return getCollectorFile(streamName, date).toString(); } public static CollectorFile getCollectorFile(String streamName, Date date) { return new CollectorFile(streamName, date, 0); } public static CollectorFile getCollectorFile(String fileName) { return CollectorFile.create(fileName); } @Override protected boolean hasReadFully() { if (currentFile != null && !setIterator()) { return false; } return super.hasReadFully(); } }