Java tutorial
/* * EDUnits Copyright 2009, NOAA. * See the LICENSE.txt file in this file's directory. */ package ucar.nc2.ogc.erddap.util; import com.google.common.math.DoubleMath; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; /** * This class has static methods to convert units from one standard to another. */ public class ErddapEDUnits { /** * UDUNITS and UCUM support metric prefixes. */ public static String metricName[] = { "yotta", "zetta", "exa", "peta", "tera", "giga", "mega", "kilo", "hecto", "deka", "deci", "centi", "milli", "micro", "nano", "pico", "femto", "atto", "zepto", "yocto", "", }; public static String metricAcronym[] = { "Y", "Z", "E", "P", "T", "G", "M", "k", "h", "da", "d", "c", "m", "u", "n", "p", "f", "a", "z", "y", "u" }; public static int nMetric = metricName.length; /** * UCUM supports power-of-two prefixes, but UDUNITS doesn't. */ public static String twoAcronym[] = { "Ki", "Mi", "Gi", "Ti" }; public static String twoValue[] = { "1024", "1048576", "1073741824", "1.099511627776e12" }; public static int nTwo = twoAcronym.length; private static final HashMap<String, String> udHashMap; private static final HashMap<String, String> ucHashMap; static { try (InputStream udunitsToUcumStream = ErddapEDUnits.class.getResourceAsStream("UdunitsToUcum.properties"); InputStream ucumToUdunitsStream = ErddapEDUnits.class .getResourceAsStream("UcumToUdunits.properties")) { udHashMap = getHashMapStringString(udunitsToUcumStream, "ISO-8859-1"); ucHashMap = getHashMapStringString(ucumToUdunitsStream, "ISO-8859-1"); } catch (IOException e) { throw new ExceptionInInitializerError(e); } } /** * This converts UDUnits to UCUM. * <br>UDUnits: http://www.unidata.ucar.edu/software/udunits/udunits-1/etc/udunits.dat * http://www.unidata.ucar.edu/software/udunits/udunits-2/udunits2.html * I worked with v 2.1.9 * <br>UCUM: http://unitsofmeasure.org/ucum.html * I worked with Version: 1.8, $Revision: 28894 $ * <p/> * <p>UDUnits supports lots of aliases (short and long) * and plurals (usually by adding 's'). * These all get reduced to UCUM's short and canonical-only units. * <p/> * <p>Notes: * <ul> * <li>This method is a strictly case sensitive. * The only UDUnits that should be capitalized (other than acronyms) are * Btu, Gregorian..., Julian..., PI. * <br>The only UDUnits that may be capitalized are * Celsius, Fahrenheit, Kelvin, Rankine. * <li>For "10 to the", UCUM allows 10* or 10^. This method uses 10^. * <li>NTU becomes {ntu}. * <li>PSU or psu becomes {psu}. * </ul> * <p/> * return the UDUnits converted to UCUM. * null returns null. "" returns "". * throws Exception if trouble. */ public static String udunitsToUcum(String udunits) { if (udunits == null) { return null; } //is it a point in time? e.g., seconds since 1970-01-01T00:00:00T int sincePo = udunits.indexOf(" since "); if (sincePo > 0) { try { //test if really appropriate double baf[] = ErddapCalendar2.getTimeBaseAndFactor(udunits); //throws exception if trouble //use 'factor', since it is more forgiving than udunitsToUcum converter String u; if (DoubleMath.fuzzyEquals(baf[1], 0.001, 1e-6)) { // Can't simply do "baf[1] == 0.001". u = "ms"; } else if (baf[1] == 1) { u = "s"; } else if (baf[1] == ErddapCalendar2.SECONDS_PER_MINUTE) { u = "min"; } else if (baf[1] == ErddapCalendar2.SECONDS_PER_HOUR) { u = "h"; } else if (baf[1] == ErddapCalendar2.SECONDS_PER_DAY) { u = "d"; } else if (baf[1] == 30 * ErddapCalendar2.SECONDS_PER_DAY) { // mo_j ? u = "mo"; } else if (baf[1] == 360 * ErddapCalendar2.SECONDS_PER_DAY) { // a_j ? u = "a"; } else { u = udunitsToUcum(udunits.substring(0, sincePo)); //shouldn't happen, but weeks? microsec? } //make "s{since 1970-01-01T00:00:00T} return u + "{" + udunits.substring(sincePo + 1) + "}"; } catch (Exception e) { } } //parse udunits and build ucum, till done StringBuilder ucum = new StringBuilder(); int udLength = udunits.length(); int po = 0; //po is next position to be read while (po < udLength) { char ch = udunits.charAt(po); //letter if (isUdunitsLetter(ch)) { //includes '' and '' //find contiguous letters|_|digit (no '-') int po2 = po + 1; while (po2 < udLength && (isUdunitsLetter(udunits.charAt(po2)) || udunits.charAt(po2) == '_' || ErddapString2.isDigit(udunits.charAt(po2)))) { po2++; } String tUdunits = udunits.substring(po, po2); po = po2; //some udunits have internal digits, but none end in digits //if it ends in digits, treat as exponent //find contiguous digits at end int firstDigit = tUdunits.length(); while (firstDigit >= 1 && ErddapString2.isDigit(tUdunits.charAt(firstDigit - 1))) { firstDigit--; } String exponent = tUdunits.substring(firstDigit); tUdunits = tUdunits.substring(0, firstDigit); String tUcum = oneUdunitsToUcum(tUdunits); //deal with PER -> / if (tUcum.equals("/")) { char lastUcum = ucum.length() == 0 ? '\u0000' : ucum.charAt(ucum.length() - 1); if (lastUcum == '/') { ucum.setCharAt(ucum.length() - 1, '.'); //2 '/' cancel out } else if (lastUcum == '.') { ucum.setCharAt(ucum.length() - 1, '/'); // '/' replaces '.' } else { ucum.append('/'); } } else { ucum.append(tUcum); } //add the exponent ucum.append(exponent); //catch -exponent as a number below continue; } //number if (ch == '-' || ErddapString2.isDigit(ch)) { //find contiguous digits int po2 = po + 1; while (po2 < udLength && ErddapString2.isDigit(udunits.charAt(po2))) { po2++; } //decimal place + digit (not just .=multiplication) boolean hasDot = false; if (po2 < udLength - 1 && udunits.charAt(po2) == '.' && ErddapString2.isDigit(udunits.charAt(po2 + 1))) { hasDot = true; po2 += 2; while (po2 < udLength && ErddapString2.isDigit(udunits.charAt(po2))) { po2++; } } //exponent? e- or e{digit} boolean hasE = false; if (po2 < udLength - 1 && Character.toLowerCase(udunits.charAt(po2)) == 'e' && (udunits.charAt(po2 + 1) == '-' || ErddapString2.isDigit(udunits.charAt(po2 + 1)))) { hasE = true; po2 += 2; while (po2 < udLength && ErddapString2.isDigit(udunits.charAt(po2))) { po2++; } } String num = udunits.substring(po, po2); po = po2; //convert floating point to rational number if (hasDot || hasE) { int rational[] = ErddapString2.toRational(ErddapString2.parseDouble(num)); if (rational[1] == Integer.MAX_VALUE) { ucum.append(num); //ignore the trouble !!! ??? } else if (rational[1] == 0) //includes {0, 0} { ucum.append(rational[0]); } else { ucum.append(rational[0]).append(".10^").append(rational[1]); } } else { //just copy num ucum.append(num); } continue; } //space or . or (183) (multiplication) if (ch == ' ' || ch == '.' || ch == 183) { char lastUcum = ucum.length() == 0 ? '\u0000' : ucum.charAt(ucum.length() - 1); if (lastUcum == '/' || lastUcum == '.') { //if last token was / or ., do nothing } else { ucum.append('.'); } po++; continue; } // * (multiplication * or exponent **) if (ch == '*') { po++; if (po < udLength && udunits.charAt(po) == '*') { ucum.append('^'); // exponent: ** -> ^ po++; } else { char lastUcum = ucum.length() == 0 ? '\u0000' : ucum.charAt(ucum.length() - 1); if (lastUcum == '/' || lastUcum == '.') { //if last token was / or ., do nothing } else { ucum.append('.'); } } continue; } // / if (ch == '/') { po++; char lastUcum = ucum.length() == 0 ? '\u0000' : ucum.charAt(ucum.length() - 1); if (lastUcum == '/') { ucum.setCharAt(ucum.length() - 1, '.'); // 2 '/' cancel out } else if (lastUcum == '.') { ucum.setCharAt(ucum.length() - 1, '/'); // '/' replaces '.' } else { ucum.append('/'); } continue; } // " if (ch == '\"') { po++; ucum.append("''"); continue; } //otherwise, punctuation. copy it ucum.append(ch); po++; } return ucum.toString(); } private static boolean isUdunitsLetter(char ch) { return ErddapString2.isLetter(ch) || ch == '' || ch == ''; } /** * This converts one udunits term (perhaps with metric prefix(es)) to the corresponding ucum string. * If udunits is just metric prefix(es), this returns the prefix acronym(s) with "{count}" as suffix * (e.g., dkilo returns dk{count}). * If this can't completely convert udunits, it returns the original udunits * (e.g., kiloBobs remains kiloBobs (to avoid 'exact' becoming 'ect' ). */ private static String oneUdunitsToUcum(String udunits) { //repeatedly pull off start of udunits and build ucum, till done String oldUdunits = udunits; StringBuilder ucum = new StringBuilder(); MAIN: while (true) { //try to find udunits in hashMap String tUcum = udHashMap.get(udunits); if (tUcum != null) { //success! done! ucum.append(tUcum); return ucum.toString(); } //try to separate out a metricName prefix (e.g., "kilo") for (int p = 0; p < nMetric; p++) { if (udunits.startsWith(metricName[p])) { udunits = udunits.substring(metricName[p].length()); ucum.append(metricAcronym[p]); if (udunits.length() == 0) { ucum.append("{count}"); return ucum.toString(); } continue MAIN; } } //try to separate out a metricAcronym prefix (e.g., "k") for (int p = 0; p < nMetric; p++) { if (udunits.startsWith(metricAcronym[p])) { udunits = udunits.substring(metricAcronym[p].length()); ucum.append(metricAcronym[p]); if (udunits.length() == 0) { ucum.append("{count}"); return ucum.toString(); } continue MAIN; } } return oldUdunits; } } /** * This converts UCUM to UDUnits. * <br>UDUnits: http://www.unidata.ucar.edu/software/udunits/udunits-1/etc/udunits.dat * http://www.unidata.ucar.edu/software/udunits/udunits-2/udunits2.html * <br>UCUM: http://unitsofmeasure.org/ucum.html * <p/> * <p>UCUM tends to be short, canonical-only, and strict. * Many UCUM units are the same in UDUnits. * <p/> * <p>UDUnits supports lots of aliases (short and long) * and plurals (usually by adding 's'). * This tries to convert UCUM to a short, common UDUNIT units. * <p/> * <p>Problems: * <ul> * <li> UCUM has only "deg", no concept of degree_east|north|true|true. * </ul> * <p/> * <p>Notes: * <ul> * <li>This method is a strictly case sensitive. * <li>For "10 to the", UCUM allows 10* or 10^. This method uses 10^. * <li>{ntu} becomes NTU. * <li>{psu} becomes PSU. * </ul> * <p/> * return the UCUM converted to UDUNITS. * null returns null. "" returns "". */ public static String ucumToUdunits(String ucum) { if (ucum == null) { return null; } StringBuilder udunits = new StringBuilder(); int ucLength = ucum.length(); if (ucLength == 0) { return ""; } //is it a time point? e.g., s{since 1970-01-01T00:00:00T} if (ucum.charAt(ucLength - 1) == '}' && //quick reject ucum.indexOf('}') == ucLength - 1) { //reasonably quick reject int sincePo = ucum.indexOf("{since "); if (sincePo > 0) { //is first part an atomic ucum unit? String tUdunits = ucHashMap.get(ucum.substring(0, sincePo)); if (tUdunits != null) { return tUdunits + " " + ucum.substring(sincePo + 1, ucLength - 1); } } } //parse ucum and build udunits, till done int po = 0; //po is next position to be read while (po < ucLength) { char ch = ucum.charAt(po); //letter if (isUcumLetter(ch)) { //includes [, ], {, }, '' and "'" //find contiguous letters|_|digit (no '-') int po2 = po + 1; while (po2 < ucLength && (isUcumLetter(ucum.charAt(po2)) || ucum.charAt(po2) == '_' || ErddapString2.isDigit(ucum.charAt(po2)))) { po2++; } String tUcum = ucum.substring(po, po2); po = po2; //some ucum have internal digits, but none end in digits //if it ends in digits, treat as exponent //find contiguous digits at end int firstDigit = tUcum.length(); while (firstDigit >= 1 && ErddapString2.isDigit(tUcum.charAt(firstDigit - 1))) { firstDigit--; } String exponent = tUcum.substring(firstDigit); tUcum = tUcum.substring(0, firstDigit); String tUdunits = oneUcumToUdunits(tUcum); //deal with PER -> / if (tUdunits.equals("/")) { char lastUdunits = udunits.length() == 0 ? '\u0000' : udunits.charAt(udunits.length() - 1); if (lastUdunits == '/') { udunits.setCharAt(udunits.length() - 1, '.'); //2 '/' cancel out } else if (lastUdunits == '.') { udunits.setCharAt(udunits.length() - 1, '/'); // '/' replaces '.' } else { udunits.append('/'); } } else { udunits.append(tUdunits); } //add the exponent udunits.append(exponent); //catch -exponent as a number below continue; } //number if (ch == '-' || ErddapString2.isDigit(ch)) { //find contiguous digits int po2 = po + 1; while (po2 < ucLength && ErddapString2.isDigit(ucum.charAt(po2))) { po2++; } // ^- or ^{digit} if (po2 < ucLength - 1 && Character.toLowerCase(ucum.charAt(po2)) == '^' && (ucum.charAt(po2 + 1) == '-' || ErddapString2.isDigit(ucum.charAt(po2 + 1)))) { po2 += 2; while (po2 < ucLength && ErddapString2.isDigit(ucum.charAt(po2))) { po2++; } } String num = ucum.substring(po, po2); po = po2; udunits.append(num); continue; } // . if (ch == '.') { po++; udunits.append(' '); // ' ' is more common than '.' in udunits continue; } // * if (ch == '*') { po++; udunits.append('^'); continue; } // ' '' if (ch == '\'') { po++; if (po < ucLength && ucum.charAt(po) == '\'') { udunits.append("arc_second"); po++; } else { udunits.append("arc_minute"); } continue; } //otherwise, punctuation. copy it // / (division), " doesn't occur, udunits.append(ch); po++; } return udunits.toString(); } private static boolean isUcumLetter(char ch) { return ErddapString2.isLetter(ch) || ch == '[' || ch == ']' || ch == '{' || ch == '}' || ch == '' || ch == '\''; } /** * This converts one ucum term (perhaps with metric prefix(es)) * ( to the corresponding udunits string. * If ucum is just metric prefix(es), this returns the metric prefix * acronym(s) with "{count}" as suffix (e.g., dkilo returns dk{count}). * If this can't completely convert ucum, it returns the original ucum * (e.g., kiloBobs remains kiloBobs (to avoid 'exact' becoming 'ect' ). */ private static String oneUcumToUdunits(String ucum) { //repeatedly pull off start of ucum and build udunits, till done String oldUcum = ucum; StringBuilder udunits = new StringBuilder(); MAIN: while (true) { //try to find ucum in hashMap String tUdunits = ucHashMap.get(ucum); if (tUdunits != null) { //success! done! udunits.append(tUdunits); return udunits.toString(); } //try to separate out a metricAcronym prefix (e.g., "k") for (int p = 0; p < nMetric; p++) { if (ucum.startsWith(metricAcronym[p])) { ucum = ucum.substring(metricAcronym[p].length()); udunits.append(metricAcronym[p]); if (ucum.length() == 0) { udunits.append("{count}"); return udunits.toString(); } continue MAIN; } } //try to separate out a twoAcronym prefix (e.g., "Ki") for (int p = 0; p < nTwo; p++) { if (ucum.startsWith(twoAcronym[p])) { ucum = ucum.substring(twoAcronym[p].length()); char udch = udunits.length() > 0 ? udunits.charAt(udunits.length() - 1) : '\u0000'; if (udch != '\u0000' && udch != '.' && udch != '/') { udunits.append('.'); } if (ucum.length() == 0) { udunits.append("{count}"); return udunits.toString(); } udunits.append(twoValue[p]).append("."); continue MAIN; } } //ends in comment? try to just convert the beginning int po1 = oldUcum.lastIndexOf('{'); if (po1 > 0 && oldUcum.endsWith("}")) { return oneUcumToUdunits(oldUcum.substring(0, po1)) + oldUcum.substring(po1); } return oldUcum; } } /** * Reads the contents of {@code inputStream} into a HashMap. Each key-value pair in the input will result in an * entry in the map. * <p/> * The specified stream remains open after this method returns. * * @param inputStream a stream with line-based, key-value pairs. * @param charset the name of a supported {@link java.nio.charset.Charset charset}. * @return a HashMap initialized from the stream. * @throws java.io.IOException if an I/O error occurs */ public static HashMap<String, String> getHashMapStringString(InputStream inputStream, String charset) throws IOException { HashMap<String, String> ht = new HashMap<>(); ErddapStringArray sa = ErddapStringArray.fromInputStream(inputStream, charset); int n = sa.size(); int i = 0; while (i < n) { String s = sa.get(i++); if (s.startsWith("#")) { continue; } while (i < n && s.endsWith("\\")) { s = s.substring(0, s.length() - 1) + sa.get(i++); } int po = s.indexOf('='); if (po < 0) { continue; } //new String: so not linked to big source file's text ht.put(s.substring(0, po).trim(), s.substring(po + 1).trim()); } return ht; } }