Java tutorial
/* * MHAP package * * This software is distributed "as is", without any warranty, including * any implied warranty of merchantability or fitness for a particular * use. The authors assume no responsibility for, and shall not be liable * for, any special, indirect, or consequential damages, or any damages * whatsoever, arising out of or in connection with the use of this * software. * * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren * University Of Maryland * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package edu.umd.marbl.mhap.utils; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.nio.ByteBuffer; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Random; import org.apache.lucene.util.OpenBitSet; import com.google.common.hash.HashCode; import com.google.common.hash.HashFunction; import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; import edu.umd.marbl.mhap.main.MhapMain; public final class Utils { private static OpenBitSet validKmerHashes; public enum ToProtein { AAA("K"), AAC("N"), AAG("K"), AAT("N"), ACA("T"), ACC("T"), ACG("T"), ACT("T"), AGA("R"), AGC("S"), AGG( "R"), AGT("S"), ATA("I"), ATC("I"), ATG("M"), ATT("I"), CAA("Q"), CAC("H"), CAG("Q"), CAT("H"), CCA( "P"), CCC("P"), CCG("P"), CCT("P"), CGA("R"), CGC("R"), CGG("R"), CGT("R"), CTA("L"), CTC( "L"), CTG("L"), CTT("L"), GAA("E"), GAC("D"), GAG("E"), GAT("D"), GCA("A"), GCC( "A"), GCG("A"), GCT("A"), GGA("G"), GGC("G"), GGG("G"), GGT("G"), GTA( "V"), GTC("V"), GTG("V"), GTT("V"), TAA("X"), TAC("Y"), TAG( "X"), TAT("Y"), TCA("S"), TCC("S"), TCG("S"), TCT("S"), TGA( "X"), TGC("C"), TGG("W"), TGT("C"), TTA( "L"), TTC("F"), TTG("L"), TTT("F"); /* * Ala/A GCU, GCC, GCA, GCG Leu/L UUA, UUG, CUU, CUC, CUA, CUG Arg/R * CGU, CGC, CGA, CGG, AGA, AGG Lys/K AAA, AAG Asn/N AAU, AAC Met/M AUG * Asp/D GAU, GAC Phe/F UUU, UUC Cys/C UGU, UGC Pro/P CCU, CCC, CCA, CCG * Gln/Q CAA, CAG Ser/S UCU, UCC, UCA, UCG, AGU, AGC Glu/E GAA, GAG * Thr/T ACU, ACC, ACA, ACG Gly/G GGU, GGC, GGA, GGG Trp/W UGG His/H * CAU, CAC Tyr/Y UAU, UAC Ile/I AUU, AUC, AUA Val/V GUU, GUC, GUA, GUG * START AUG STOP UAG, UGA, UAA */ private String other; ToProtein(String other) { this.other = other; } public String getProtein() { return this.other; } } public enum Translate { A("T"), B("V"), C("G"), D("H"), G("C"), H("D"), K("M"), M("K"), N("N"), R("Y"), S("S"), T("A"), V("B"), W( "W"), Y("R"); private String other; Translate(String other) { this.other = other; } public String getCompliment() { return this.other; } } public static final int BUFFER_BYTE_SIZE = 8388608; // 8MB public static final NumberFormat DECIMAL_FORMAT = new DecimalFormat("############.########"); public static final int FASTA_LINE_LENGTH = 60; public static final int MBYTES = 1048576; public static int checkForEnd(String line, int brackets) { if (line.startsWith("{")) { brackets++; } if (line.startsWith("}")) { brackets--; } if (brackets == 0) { return -1; } return brackets; } public static long[] computeHashes(String item, int numWords, int seed) { long[] hashes = new long[numWords]; for (int word = 0; word < numWords; word += 2) { HashFunction hashFunc = Hashing.murmur3_128(seed + word); Hasher hasher = hashFunc.newHasher(); hasher.putUnencodedChars(item); // get the two longs out HashCode hc = hasher.hash(); ByteBuffer bb = ByteBuffer.wrap(hc.asBytes()); hashes[word] = bb.getLong(0); if (word + 1 < numWords) hashes[word + 1] = bb.getLong(8); } return hashes; } public final static int[] computeHashesInt(Object obj, int numWords, int seed) { if (obj instanceof Integer) return computeHashesIntInt((Integer) obj, numWords, seed); if (obj instanceof Long) return computeHashesIntLong((Long) obj, numWords, seed); if (obj instanceof Double) return computeHashesIntDouble((Double) obj, numWords, seed); if (obj instanceof Float) return computeHashesIntFloat((Float) obj, numWords, seed); if (obj instanceof String) return computeHashesIntString((String) obj, numWords, seed); throw new MhapRuntimeException("Cannot hash class type " + obj.getClass().getCanonicalName()); } public final static int[] computeHashesIntDouble(double obj, int numWords, int seed) { int[] hashes = new int[numWords]; HashFunction hf = Hashing.murmur3_32(seed); for (int iter = 0; iter < numWords; iter++) { HashCode hc = hf.newHasher().putDouble(obj).putInt(iter).hash(); hashes[iter] = hc.asInt(); } return hashes; } public final static int[] computeHashesIntFloat(float obj, int numWords, int seed) { int[] hashes = new int[numWords]; HashFunction hf = Hashing.murmur3_32(seed); for (int iter = 0; iter < numWords; iter++) { HashCode hc = hf.newHasher().putFloat(obj).putInt(iter).hash(); hashes[iter] = hc.asInt(); } return hashes; } public final static int[] computeHashesIntInt(int obj, int numWords, int seed) { int[] hashes = new int[numWords]; HashFunction hf = Hashing.murmur3_32(seed); for (int iter = 0; iter < numWords; iter++) { HashCode hc = hf.newHasher().putInt(obj).putInt(iter).hash(); hashes[iter] = hc.asInt(); } return hashes; } public final static int[] computeHashesIntLong(long obj, int numWords, int seed) { int[] hashes = new int[numWords]; HashFunction hf = Hashing.murmur3_32(seed); for (int iter = 0; iter < numWords; iter++) { HashCode hc = hf.newHasher().putLong(obj).putInt(iter).hash(); hashes[iter] = hc.asInt(); } return hashes; } public final static int[] computeHashesIntString(String obj, int numWords, int seed) { int[] hashes = new int[numWords]; HashFunction hf = Hashing.murmur3_32(seed); for (int iter = 0; iter < numWords; iter++) { HashCode hc = hf.newHasher().putUnencodedChars(obj).putInt(iter).hash(); hashes[iter] = hc.asInt(); } return hashes; } public final static long[][] computeKmerHashes(final String seq, final int kmerSize, final int numWords, final int seed) { final int numberKmers = seq.length() - kmerSize + 1; if (numberKmers < 1) throw new MhapRuntimeException("Kmer size bigger than string length."); // get the rabin hashes final long[] rabinHashes = computeSequenceHashesLong(seq, kmerSize, seed); final long[][] hashes = new long[rabinHashes.length][numWords]; // Random rand = new Random(0); for (int iter = 0; iter < rabinHashes.length; iter++) { // rand.setSeed(rabinHashes[iter]); long x = rabinHashes[iter]; for (int word = 0; word < numWords; word++) { // hashes[iter][word] = rand.nextLong(); // XORShift Random Number Generators x ^= (x << 21); x ^= (x >>> 35); x ^= (x << 4); hashes[iter][word] = x; } } return hashes; } public final static long[][] computeKmerHashesExact(final String seq, final int kmerSize, final int numWords, final int seed) { HashFunction hf = Hashing.murmur3_128(seed); long[][] hashes = new long[seq.length() - kmerSize + 1][numWords]; for (int iter = 0; iter < hashes.length; iter++) { String subStr = seq.substring(iter, iter + kmerSize); for (int word = 0; word < numWords; word++) { HashCode hc = hf.newHasher().putUnencodedChars(subStr).putInt(word).hash(); hashes[iter][word] = hc.asLong(); } } return hashes; } public final static int[] computeSequenceHashes(final String seq, final int kmerSize) { // RollingSequenceHash rabinHash = new RollingSequenceHash(kmerSize); // final int[] rabinHashes = rabinHash.hashInt(seq); HashFunction hf = Hashing.murmur3_32(0); int[] hashes = new int[seq.length() - kmerSize + 1]; for (int iter = 0; iter < hashes.length; iter++) { HashCode hc = hf.newHasher().putUnencodedChars(seq.substring(iter, iter + kmerSize)).hash(); hashes[iter] = hc.asInt(); } return hashes; } public final static long[] computeSequenceHashesLongValidKmers(final String seq, final int kmerSize, final int seed) { HashFunction hf = Hashing.murmur3_128(seed); long[] hashes = new long[seq.length() - kmerSize + 1]; for (int iter = 0; iter < hashes.length; iter++) { String subSeq = seq.substring(iter, iter + kmerSize); long hashYGS; hashes[iter] = Long.MAX_VALUE; if (!subSeq.contains("N")) { hashYGS = computeHashYGS(subSeq); //if(MhapMain.getValidKmersHashes().get(hashYGS)) //Bernardo if (MhapMain.getValidKmersHashes().fastGet(hashYGS)) { HashCode hc = hf.newHasher().putUnencodedChars(subSeq).hash(); hashes[iter] = hc.asLong(); } } } return hashes; } public final static long[] computeSequenceHashesLong(final String seq, final int kmerSize, final int seed) { HashFunction hf = Hashing.murmur3_128(seed); long[] hashes = new long[seq.length() - kmerSize + 1]; for (int iter = 0; iter < hashes.length; iter++) { HashCode hc = hf.newHasher().putUnencodedChars(seq.substring(iter, iter + kmerSize)).hash(); hashes[iter] = hc.asLong(); } return hashes; } // add new line breaks every FASTA_LINE_LENGTH characters public final static String convertToFasta(String supplied) { StringBuffer converted = new StringBuffer(); int i = 0; String[] split = supplied.trim().split("\\s+"); if (split.length > 1) { // process as a qual int size = 0; for (i = 0; i < split.length; i++) { converted.append(split[i]); size += split[i].length(); if (i != (split.length - 1)) { if (size >= FASTA_LINE_LENGTH) { size = 0; converted.append("\n"); } else { converted.append(" "); } } } } else { for (i = 0; (i + FASTA_LINE_LENGTH) < supplied.length(); i += FASTA_LINE_LENGTH) { converted.append(supplied.substring(i, i + FASTA_LINE_LENGTH)); converted.append("\n"); } converted.append(supplied.substring(i, supplied.length())); } return converted.toString(); } public final static int countLetterInRead(String fasta, String letter) { return countLetterInRead(fasta, letter, false); } public final static int countLetterInRead(String fasta, String letter, Boolean caseSensitive) { String ungapped = Utils.getUngappedRead(fasta); int len = ungapped.length(); if (len == 0) { return -1; } int increment = letter.length(); int count = 0; for (int i = 0; i <= ungapped.length() - increment; i += increment) { if (letter.equals(ungapped.substring(i, i + increment)) && caseSensitive) { count++; } if (letter.equalsIgnoreCase(ungapped.substring(i, i + increment)) && !caseSensitive) { count++; } } return count; } public final static HashSet<Long> createKmerFilter(String fileName, double maxPercent, int kmerSize, int seed) throws IOException { File file = new File(fileName); // make sure don't leak resources try (BufferedReader bf = new BufferedReader(new FileReader(file), BUFFER_BYTE_SIZE);) { // generate hashset ArrayList<Long> filterArray = new ArrayList<Long>(); String line = bf.readLine(); while (line != null) { String[] str = line.split("\\s+", 3); if (str.length < 2) throw new MhapRuntimeException( "Kmer filter file must have at least two column [kmer kmer_percent]."); double percent = Double.parseDouble(str[1]); // if greater, add to hashset if (percent > maxPercent) { long[] minHash = Utils.computeSequenceHashesLong(str[0], kmerSize, seed); if (minHash.length > 1) System.err.println("Warning filter file kmer size larger than setting!"); for (long val : minHash) filterArray.add(val); } else break; // read the next line line = bf.readLine(); } return new HashSet<Long>(filterArray); } } public static void createValidKmerFilter(String validKmersFile, int kmerSize, int i) throws FileNotFoundException, IOException { File file = new File(validKmersFile); /* java Integer.MAX_VALUE = 2,147,483,647 BitSet must accommodate 16 T: TTTTTTTTTTTTTTTT = binary 11111111111111111111111111111111 = decimal 4,294,967,295 = 2* Integer.MAX_VALUE + 1 */ long numBits = 2 * (long) Integer.MAX_VALUE + 1; validKmerHashes = new OpenBitSet(numBits); String kmer, kmer_rc; int seed = 0; long kmerHash; checkValidKmersFile(file, kmerSize); try (BufferedReader bf = new BufferedReader(new FileReader(file), BUFFER_BYTE_SIZE);) { String line = bf.readLine(); while (line != null) { kmerHash = Utils.computeHashYGS(line); validKmerHashes.fastSet(kmerHash); kmer_rc = Utils.rc(line); kmerHash = Utils.computeHashYGS(kmer_rc); validKmerHashes.fastSet(kmerHash); line = bf.readLine(); } } } private static void checkValidKmersFile(File file, int kmerSize) throws FileNotFoundException, IOException { try (BufferedReader bf = new BufferedReader(new FileReader(file));) { String line = bf.readLine(); int counter = 1; while (line != null && counter <= 100) { String[] str = line.split("\\s+"); if (str.length > 1) { throw new MhapRuntimeException( "Valid kmer file must have at most one column [kmer]. Offending line: " + counter + ". With text: " + line); } if (line.length() != kmerSize) { throw new MhapRuntimeException( "Kmers of valid kmers file have " + line.length() + ", but the expected was " + kmerSize + "Offending line: " + counter + ". With text: " + line); } if (line.matches("[^ATGCatgc]")) { throw new MhapRuntimeException("non ATGC character found at valid kmers file. Offending line: " + counter + ". With text: " + line); } counter++; } } } public static long computeHashYGS(String kmer) { String kmerBinary; kmerBinary = kmer.replaceAll("A", "00"); kmerBinary = kmerBinary.replaceAll("C", "01"); kmerBinary = kmerBinary.replaceAll("G", "10"); kmerBinary = kmerBinary.replaceAll("T", "11"); long val = Long.parseLong(kmerBinary, 2); //System.out.println(kmer + ", " + kmerBinary + ", " + val); return val; } public static String reverseComputeHashYGS(long index) { String binarySequence = Long.toBinaryString(index); if (binarySequence.length() < 32) binarySequence = completeZeros(binarySequence); String binaryBase; String seq = ""; for (int i = 0; i < 32; i = i + 2) { binaryBase = binarySequence.substring(i, i + 2); binaryBase = binaryBase.replaceAll("00", "A"); binaryBase = binaryBase.replaceAll("01", "C"); binaryBase = binaryBase.replaceAll("10", "G"); binaryBase = binaryBase.replaceAll("11", "T"); seq += binaryBase; } return seq; } private static String completeZeros(String kmer) { while (kmer.length() < 32) { kmer = "0" + kmer; } return kmer; } public static long[] computeHashYGSToArray(String seq, int kmerSize) { //HashFunction hf = Hashing.murmur3_128(seed); String subKmer; long[] hashes = new long[seq.length() - kmerSize + 1]; for (int iter = 0; iter < hashes.length; iter++) { //HashCode hc = hf.newHasher().putUnencodedChars(seq.substring(iter, iter + kmerSize)).hash(); //hashes[iter] = hc.asLong(); subKmer = seq.substring(iter, iter + kmerSize); if (!subKmer.contains("N")) { hashes[iter] = computeHashYGS(subKmer); } else { hashes[iter] = Integer.MAX_VALUE; } } return hashes; } public final static int[] errorString(int[] s, double readError) { int[] snew = s.clone(); Random generator = new Random(); for (int iter = 0; iter < s.length; iter++) { if (generator.nextDouble() < readError) while (snew[iter] == s[iter]) snew[iter] = generator.nextInt(3); } return snew; } public final static int[] generateInts(long seed, int numWords) { int[] hashes = new int[numWords]; long x = seed; for (int word = 0; word < numWords; word++) { // hashes[iter][word] = rand.nextInt(); // XORShift Random Number Generators x ^= (x << 21); x ^= (x >>> 35); x ^= (x << 4); hashes[word] = (int) x; } return hashes; } public final static long[] generateLongs(long seed, int numWords) { long[] hashes = new long[numWords]; long x = seed; for (int word = 0; word < numWords; word++) { // hashes[iter][word] = rand.nextInt(); // XORShift Random Number Generators x ^= (x << 21); x ^= (x >>> 35); x ^= (x << 4); hashes[word] = x; } return hashes; } public final static BufferedReader getFile(String fileName, String postfix) throws Exception { String[] array = new String[1]; array[0] = postfix; return getFile(fileName, array); } public final static BufferedReader getFile(String fileName, String[] postfix) throws IOException { BufferedReader bf = null; if (fileName.endsWith("bz2")) { // open file as a pipe System.err.println("Running command " + "bzip2 -dc " + new File(fileName).getAbsolutePath() + " |"); Process p = Runtime.getRuntime().exec("bzip2 -dc " + new File(fileName).getAbsolutePath() + " |"); bf = new BufferedReader(new InputStreamReader(p.getInputStream()), BUFFER_BYTE_SIZE); System.err.println(bf.ready()); } else if (fileName.endsWith("gz")) { // open file as a pipe System.err.println("Runnning comand " + "gzip -dc " + new File(fileName).getAbsolutePath() + " |"); Process p = Runtime.getRuntime().exec("gzip -dc " + new File(fileName).getAbsolutePath() + " |"); bf = new BufferedReader(new InputStreamReader(p.getInputStream()), BUFFER_BYTE_SIZE); System.err.println(bf.ready()); } else { int i = 0; for (i = 0; i < postfix.length; i++) { if (fileName.endsWith(postfix[i])) { bf = new BufferedReader(new FileReader(fileName), BUFFER_BYTE_SIZE); break; } } if (i == postfix.length) { System.err.println("Unknown file format " + fileName + " Skipping!"); } } return bf; } public final static String getID(String line) { String ids[] = line.split(":"); int commaPos = ids[1].indexOf(","); if (commaPos != -1) { return ids[1].substring(1, commaPos).trim(); } else { return ids[1]; } } public final static double getLetterPercentInRead(String fasta, String letter) { int ungappedLen = getUngappedRead(fasta).length(); int count = countLetterInRead(fasta, letter); return count / (double) ungappedLen; } public final static int getOvlSize(int readA, int readB, int ahang, int bhang) { if ((ahang <= 0 && bhang >= 0) || (ahang >= 0 && bhang <= 0)) { return -1; } if (ahang < 0) { return readA - Math.abs(bhang); } else { return readA - ahang; } } public final static int getRangeOverlap(int startA, int endA, int startB, int endB) { int minA = Math.min(startA, endA); int minB = Math.min(startB, endB); int maxA = Math.max(startA, endA); int maxB = Math.max(startB, endB); int start = Math.max(minA, minB); int end = Math.min(maxA, maxB); return (end - start + 1); } public final static String getUngappedRead(String fasta) { fasta = fasta.replaceAll("N", ""); fasta = fasta.replaceAll("-", ""); assert (fasta.length() >= 0); return fasta; } public final static String getValue(String line, String key) { if (line.startsWith(key)) { return line.split(":")[1]; } return null; } public final static <H> double hashEfficiency(HashMap<Integer, ArrayList<H>> c) { double e = hashEnthropy(c); double log2inv = 1.0 / Math.log(2); double scaling = Math.log(c.size()) * log2inv; return e / scaling; } public final static <H> double hashEnthropy(HashMap<Integer, ArrayList<H>> c) { double sum = 0.0; double log2inv = 1.0 / Math.log(2); double[] p = new double[c.size()]; int size = 0; int count = 0; for (ArrayList<H> elem : c.values()) { size += elem.size(); p[count++] = elem.size(); } for (int iter = 0; iter < p.length; iter++) { double val = p[iter] / (double) size; sum -= val * Math.log(val) * log2inv; } return sum; } public final static boolean isAContainedInB(int startA, int endA, int startB, int endB) { int minA = Math.min(startA, endA); int minB = Math.min(startB, endB); int maxA = Math.max(startA, endA); int maxB = Math.max(startB, endB); return (minB < minA && maxB > maxA); } public final static Pair<Double, Double> linearRegression(int[] a, int[] b, int size) { // take one pass and compute means int xy = 0; int x = 0; int y = 0; int x2 = 0; for (int iter = 0; iter < size; iter++) { xy += a[iter] * b[iter]; x += a[iter]; y += b[iter]; x2 += a[iter] * a[iter]; } double Ninv = 1.0 / (double) size; double beta = ((double) xy - Ninv * (double) (x * y)) / ((double) x2 - Ninv * (double) (x * x)); double alpha = Ninv * ((double) y - beta * (double) x); return new Pair<Double, Double>(alpha, beta); } public final static double mean(double[] a, int size) { double x = 0.0; for (int iter = 0; iter < size; iter++) x += a[iter]; return x / (double) size; } public final static double mean(int[] a, int size) { int x = 0; for (int iter = 0; iter < size; iter++) x += a[iter]; return x / (double) size; } public final static double pearsonCorr(int[] a, int[] b, int size) { if (size < 2) return 0.0; double meana = mean(a, size); double meanb = mean(b, size); double stda = std(a, size, meana); double stdb = std(b, size, meanb); double r = 0.0; for (int iter = 0; iter < size; iter++) { r += ((double) a[iter] - meana) * ((double) b[iter] - meanb) / (stda * stdb); } return r / (double) (size - 1); } // adapted form // http://blog.teamleadnet.com/2012/07/quick-select-algorithm-find-kth-element.html public final static int quickSelect(int[] array, int k, int length) { if (array == null || length <= k) return Integer.MAX_VALUE; int from = 0; int to = length - 1; // if from == to we reached the kth element while (from < to) { int r = from; int w = to; int mid = array[(r + w) / 2]; // stop if the reader and writer meets while (r < w) { if (array[r] >= mid) { // put the large values at the end int tmp = array[w]; array[w] = array[r]; array[r] = tmp; w--; } else { // the value is smaller than the pivot, skip r++; } } // if we stepped up (r++) we need to step one down if (array[r] > mid) r--; // the r pointer is on the end of the first k elements if (k <= r) { to = r; } else { from = r + 1; } } return array[k]; } public final static String rc(String supplied) { StringBuilder st = new StringBuilder(); for (int i = supplied.length() - 1; i >= 0; i--) { char theChar = supplied.charAt(i); if (theChar != '-') { Translate t = Translate.valueOf(Character.toString(theChar).toUpperCase()); st.append(t.getCompliment()); } else { st.append("-"); } } return st.toString(); } public final static double std(double[] a, int size, double mean) { double x = 0.0; for (int iter = 0; iter < size; iter++) { double val = a[iter] - mean; x += val * val; } return Math.sqrt(x / (double) (size - 1)); } public final static double std(int[] a, int size, double mean) { double x = 0.0; for (int iter = 0; iter < size; iter++) { double val = (double) a[iter] - mean; x += val * val; } return Math.sqrt(x / (double) (size - 1)); } public final static String toProtein(String genome, boolean isReversed, int frame) { StringBuilder result = new StringBuilder(); if (isReversed) { genome = rc(genome); } genome = genome.replaceAll("-", ""); for (int i = frame; i < (genome.length() - 3); i += 3) { String codon = genome.substring(i, i + 3); String protein = ToProtein.valueOf(codon).getProtein(); result.append(protein); } return result.toString(); } public static String toString(double[][] A) { StringBuilder s = new StringBuilder(); s.append("["); for (double[] a : A) { if (a != null) { for (int iter = 0; iter < a.length - 1; iter++) s.append("" + a[iter] + ","); if (a.length > 0) s.append("" + a[a.length - 1]); } s.append("\n"); } s.append("]"); return new String(s); } public static String toString(long[][] A) { StringBuilder s = new StringBuilder(); s.append("["); for (long[] a : A) { if (a != null) { for (int iter = 0; iter < a.length - 1; iter++) s.append("" + a[iter] + ","); if (a.length > 0) s.append("" + a[a.length - 1]); } s.append("\n"); } s.append("]"); return new String(s); } public static OpenBitSet getValidKmerHashes() { return validKmerHashes; } }