Java tutorial
/* * Copyright 2006, United States Geological Survey or * third-party contributors as indicated by the @author tags. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package gov.usgs.anss.query; import gov.usgs.anss.edge.*; import gov.usgs.anss.query.EdgeQueryOptions.OutputType; import gov.usgs.anss.query.cwb.holdings.CWBHoldingsServerImpl; import gov.usgs.anss.query.outputter.Filename; import gov.usgs.anss.seed.MiniSeed; import java.io.*; import java.net.*; import java.util.ArrayList; import java.util.GregorianCalendar; import java.util.Collections; import java.text.DecimalFormat; import java.util.Comparator; import java.util.TimeZone; import java.util.logging.Level; import java.util.logging.LogManager; import java.util.logging.Logger; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.ISODateTimeFormat; /** This class is the main class for CWBQuery which allows the user to make queries * against all files on a CWB or Edge computer. The program has two modes : * command line, and command file. * *In command line mode the user can specify all of the options and get data from *a single seedname mask. * *In command file mode some of the command line args come from the command line * while the seedname mask, start time and duration come from the command file * *The command line arguments are : * -s seedmask Set the seedmask as a regular expression (.=anychar, [] set of matches). * The 12 character seedname is NNSSSSSCCCLL N=network, S=station code, * C= channel code L=location code. * The regular expression can be useful in that IMPD03.[BS]H... would return * all components with network IM, the first 4 chars of station code are PD03, * for all BH or SH components and any location code. * -b yyyy/mm/dd hh:mm:ss The time to start the query. * -d secs The number of seconds of data (the duration). * * -t type Legal types are "sac", "ms", "msz", dcc512, and "dcc" where * sac = sac binary format. Data will be zero filled and start at next sample * at or after the -b time with exactly the duration * ms = mini-seed raw format. The mini-seed blocks are returned in sorted order * but they might overlap or be duplicated. * msz = mini-seed but zero-filled and recompressed. No blk 1000, 1001 are * preserved. Data will start at sample follow -b time and there will be * a full duration of the data. If -msb is specified, use that block length. * dcc = blocks processed to 4096 to best effort Mini-seed eliminating overlaps, etc. * dcc512 = blocks processed to 512 best effort Mini-seed eliminating overlaps, etc. * -f filename Use the file command mode. The list of -s -b -d are in the file one per line * -h host The host of the server computer as a name or dotted IP address * -p port The port on which the service is running * -msb blocksize Set the blocksize for msz output *-dbg Turn on the debug flag * * @author davidketchum * TODO: consider prepending (e.g.) - if (logger.getLevel().intValue() <= Level.FINEST.intValue()) - to low level logger statements with concatenated toString parameter(s). */ public class EdgeQueryClient { static DecimalFormat df2; static DecimalFormat df4; static DecimalFormat df6; private static final Logger logger = Logger.getLogger(EdgeQueryClient.class.getName()); static { logger.fine("$Id$"); } private static DateTimeFormatter hmsFormat = ISODateTimeFormat.time().withZone(DateTimeZone.forID("UTC")); /** Creates a new instance of EdgeQueryClient */ public EdgeQueryClient() { } /** do a query. The command line arguments are passed in as they are for the query tool * a files is created unless -t null is specified. In that case the return is an ArrayList * containing ArrayLists<MiniSeed> for each channel returned *@param args The String array with args per the documentation *@return The ArrayList with ArrayLists of miniseed one for each channel returned. */ public static ArrayList<ArrayList<MiniSeed>> query(EdgeQueryOptions options) { String line = ""; long msSetup = 0; long msConnect = 0; long msTransfer = 0; long msOutput = 0; long msCommand = 0; long startTime = System.currentTimeMillis(); long startPhase = startTime; byte[] b = new byte[4096]; Outputer out = null; if (df6 == null) { df6 = new DecimalFormat("000000"); } GregorianCalendar jan_01_2007 = new GregorianCalendar(2007, 0, 1); ArrayList<ArrayList<MiniSeed>> blksAll = null; String filename = ""; BufferedReader infile = null; // TODO: Push this into EdgeQueryOptions in favour of a command line iterator. try { infile = new BufferedReader(options.getAsReader()); } catch (FileNotFoundException ex) { logger.severe("did not find the input file=" + options.filenamein); } // the "in" BufferedReader will give us the command lines we need for the other end try { // for each line of input, read it, reformat it with single quotes, send to server int nline = 0; int totblks = 0; // particularly for the DCC we want this program to not error out if we cannot connect to the server // So make sure we can connect and print messages Socket ds = null; while (ds == null) { try { ds = new Socket(options.host, options.port); } catch (IOException e) { ds = null; if (e != null) { if (e.getMessage() != null) { if (e.getMessage().indexOf("Connection refused") >= 0) { logger.warning("Got a connection refused. " + options.host + "/" + options.port + " Is the server up? Wait 20 and try again"); } } else { logger.warning("Got IOError opening socket to server e=" + e); } } else { logger.warning("Got IOError opening socket to server e=" + e); } try { Thread.sleep(20000); } catch (InterruptedException ex) { // This isn't necessarily a major issue, and for the purposes // of sleep, we really don't care. logger.log(Level.FINE, "sleep interrupted.", ex); } } } InputStream in = ds.getInputStream(); // Get input and output streams OutputStream outtcp = ds.getOutputStream(); msConnect += (System.currentTimeMillis() - startPhase); startPhase = System.currentTimeMillis(); while ((line = infile.readLine()) != null) { if (line.length() < 2) { continue; } nline++; options = new EdgeQueryOptions(line); if (!options.isValid()) { logger.severe("Error @line " + nline); return null; } out = options.getOutputter(); if (out == null) { blksAll = new ArrayList<ArrayList<MiniSeed>>(20); } else if (out instanceof SacOutputer) { ((SacOutputer) out).doQuery(); continue; } // The length at which our compare for changes depends on the output file mask Comparator nsclComparator = options.getNsclComparator(); long maxTime = 0; int ndups = 0; line = options.getSingleQuotedCommand(); try { msSetup += (System.currentTimeMillis() - startPhase); startPhase = System.currentTimeMillis(); boolean perfStart = true; outtcp.write(line.getBytes()); int iblk = 0; NSCL nscl = null; boolean eof = false; MiniSeed ms = null; int npur = 0; ArrayList<MiniSeed> blks = new ArrayList<MiniSeed>(100); while (!eof) { try { // Try to read a mini-seed, if it failes mark eof if (read(in, b, 0, (options.gapsonly ? 64 : 512))) { if (b[0] == '<' && b[1] == 'E' && b[2] == 'O' && b[3] == 'R' && b[4] == '>') { eof = true; ms = null; logger.fine("EOR found"); } else { ms = new MiniSeed(b); logger.finest("" + ms); if (!options.gapsonly && ms.getBlockSize() != 512) { read(in, b, 512, ms.getBlockSize() - 512); ms = new MiniSeed(b); } iblk++; totblks++; } } else { eof = true; // still need to process this last channel THIS SHOULD NEVER HAPPEN unless socket is lost ms = null; logger.warning(" *** Unexpected EOF Found"); if (out != null) { System.exit(1); // error out with no file } } if (perfStart) { msCommand += (System.currentTimeMillis() - startPhase); startPhase = System.currentTimeMillis(); perfStart = false; } logger.finest(iblk + " " + ms); if (!options.quiet && iblk % 1000 == 0 && iblk > 0) { // This is a user-feedback counter. System.out.print("\r \r" + iblk + "..."); } if (eof || (nscl != null && (ms == null ? true : nsclComparator.compare(nscl, NSCL.stringToNSCL(ms.getSeedName())) != 0))) { msTransfer += (System.currentTimeMillis() - startPhase); startPhase = System.currentTimeMillis(); if (!options.quiet) { // TODO could go into a helper method int nsgot = 0; if (blks.size() > 0) { Collections.sort(blks); logger.finer(blks.size() + " " + iblk); for (int i = 0; i < blks.size(); i++) { nsgot += (blks.get(i)).getNsamp(); } logger.finest("" + (MiniSeed) blks.get(blks.size() - 1)); System.out.print('\r'); DateTime dt = new DateTime().withZone(DateTimeZone.forID("UTC")); logger.info(hmsFormat.print(dt.getMillis()) + " Query on " + nscl + " " + df6.format(blks.size()) + " mini-seed blks " + (blks.get(0) == null ? "Null" : ((MiniSeed) blks.get(0)).getTimeString()) + " " + (blks.get((blks.size() - 1)) == null ? "Null" : (blks.get(blks.size() - 1)).getEndTimeString()) + " " + " ns=" + nsgot); } else { System.out.print('\r'); logger.info("Query on " + options.getSeedname() + " returned 0 blocks!"); } } if (blks.size() > 0) { MiniSeed ms2 = blks.get(0); if (out == null) { // Get the array list output ArrayList<MiniSeed> newBlks = new ArrayList<MiniSeed>(blks.size()); for (int i = 0; i < blks.size(); i++) { newBlks.add(i, blks.get(i)); } blksAll.add(newBlks); } else { // create the output file if (options.getType() == OutputType.ms || options.getType() == OutputType.dcc || options.getType() == OutputType.dcc512 || options.getType() == OutputType.msz) { filename = Filename.makeFilename(options.filemask, nscl, ms2); } else { filename = Filename.makeFilename(options.filemask, nscl, options.getBegin()); } //filename = lastComp; // TODO - should happen in the makeFilename methods. filename = filename.replaceAll(" ", "_"); logger.finest(((MiniSeed) blks.get(0)).getTimeString() + " to " + ((MiniSeed) blks.get(blks.size() - 1)).getTimeString() + " " + (((MiniSeed) blks.get(0)).getGregorianCalendar().getTimeInMillis() - ((MiniSeed) blks.get(blks.size() - 1)) .getGregorianCalendar().getTimeInMillis()) / 1000L); // Due to a foul up in data in Nov, Dec 2006 it is possible the Q330s got the // same baler block twice, but the last 7 512's of the block zeroed and the other // correct. Find these and purge the bad ones. if (!options.gapsonly) { for (int i = blks.size() - 1; i >= 0; i--) { if (blks.get(i).getBlockSize() == 4096 && // Has to be a big block or it does not happen blks.get(i).getGregorianCalendar() .compareTo(jan_01_2007) < 0 && blks.get(i).getUsedFrameCount() < blks.get(i) .getB1001FrameCount() && blks.get(i).getUsedFrameCount() <= 7 && blks.get(i).getB1001FrameCount() > 7) { blks.remove(i); npur++; } } } logger.finer("Found " + npur + " recs with on first block of 4096 valid"); blks.trimToSize(); //for(int i=0; i<blks.size(); i++) logger.finest(((MiniSeed) blks.get(i)).toString()); // TODO: Change the signature to pass options only once. out.makeFile(nscl, filename, blks); } } maxTime = 0; if (blks.size() > 0) { blks.clear(); System.gc(); // Lots of memory just abandoned. Try garbage collector } msOutput += (System.currentTimeMillis() - startPhase); startPhase = System.currentTimeMillis(); } // If this block is the first in a new component, clear the blks array //if(!lastComp.substring(0,compareLength).equals( // ms.getSeedName().substring(0,compareLength))) blks.clear(); /* in late 2007 there was some files which were massively duplicated by block. * to prevent this from blowing memory when there are so may we eliminate and duplicate * blocks here. If it is massively out of order , all of these block checks will slow things * down. **/ boolean isDuplicate = false; if (ms != null) { if (ms.getTimeInMillis() <= maxTime) { // No need to check duplicates if this is newest seen if (!options.gapsonly) { if (blks.size() >= 1) { for (int i = blks.size() - 1; i >= 0; i--) { if (ms.isDuplicate(blks.get(i))) { isDuplicate = true; break; } } } } if (!isDuplicate && ms.getIndicator().compareTo("D ") >= 0) { blks.add(ms); } else { ndups++; } } else { if (ms.getIndicator().compareTo("D ") >= 0) { blks.add(ms); // If its not D or better, its been zapped! } maxTime = ms.getTimeInMillis(); } nscl = NSCL.stringToNSCL(ms.getSeedName()); } } catch (IllegalSeednameException e) { logger.severe("Seedname exception making a seed record e=" + e.getMessage()); } } // while(!eof) if (!options.quiet && iblk > 0) { logger.info(iblk + " Total blocks transferred in " + (System.currentTimeMillis() - startTime) + " ms " + (iblk * 1000L / Math.max(System.currentTimeMillis() - startTime, 1)) + " b/s " + npur + " #dups=" + ndups); } if (out == null) { return blksAll; // If called in no file output mode, return the blocks } blks.clear(); } catch (UnknownHostException e) { logger.severe("EQC main: Host is unknown=" + options.host + "/" + options.port); if (out != null) { System.exit(1); } return null; } catch (IOException e) { if (e.getMessage().equalsIgnoreCase("Connection refused")) { logger.severe( "The connection was refused. Server is likely down or is blocked. This should never happen."); return null; } else { logger.severe(e + " EQC main: IO error opening/reading socket=" + options.host + "/" + options.port); if (out != null) { System.exit(1); } } } } // End of readline outtcp.write("\n".getBytes()); // Send end of request marker to query if (ds.isClosed()) { try { ds.close(); } catch (IOException e) { } } if (options.perfMonitor) { long msEnd = System.currentTimeMillis() - startPhase; logger.info("Perf setup=" + msSetup + " connect=" + msConnect + " Cmd=" + msCommand + " xfr=" + msTransfer + " out=" + msOutput + " last=" + msEnd + " tot=" + (msSetup + msConnect + msTransfer + msOutput + msEnd) + " #blks=" + totblks + " #lines=" + nline); } return null; } catch (IOException e) { logger.severe(e + " IOError reading input lines."); } return null; } public static boolean read(InputStream in, byte[] b, int off, int l) throws IOException { int len; while ((len = in.read(b, off, l)) > 0) { off += len; l -= len; if (l == 0) { return true; } } return false; } public static void main(String[] args) { if (args.length == 0) { System.out.println(QueryProperties.getUsage()); System.exit(1); } // Load a default logging properties file if none already set. String customLogConfigFile = System.getProperty("java.util.logging.config.file"); if (customLogConfigFile == null) { // Use default logging try { InputStream configFile = ClassLoader.getSystemResourceAsStream("logging.properties"); LogManager.getLogManager().readConfiguration(configFile); } catch (IOException ex) { logger.severe("Failed to open configuration file, logging not configured."); } logger.config("Using default logging configuration."); } else { logger.config("Using custom logging config file: " + customLogConfigFile); } //TODO this any any others should get explicitly set on calendars. "UTC"? TimeZone tz = TimeZone.getTimeZone("GMT+0"); TimeZone.setDefault(tz); logger.finest("Running Edge Query"); EdgeQueryOptions options = new EdgeQueryOptions(args); CWBHoldingsServerImpl cwbServer = new CWBHoldingsServerImpl(options.host, options.port); // The ls option does not require any args checking if (options.isListQuery()) { logger.info(cwbServer.listChannels(options.getBegin(), options.getDuration())); } else { query(options); } } }