VCF.VCF.java Source code

Java tutorial

Introduction

Here is the source code for VCF.VCF.java

Source

/*
 * This file is part of LinkImputeR.
 * 
 * LinkImputeR is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * LinkImputeR is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LinkImpute.  If not, see <http://www.gnu.org/licenses/>.
 */

package VCF;

import VCF.Changers.GenotypeChanger;
import VCF.Changers.PositionChanger;
import VCF.Exceptions.VCFDataException;
import VCF.Exceptions.VCFDataLineException;
import VCF.Exceptions.VCFException;
import VCF.Exceptions.VCFHeaderLineException;
import VCF.Exceptions.VCFInputException;
import VCF.Exceptions.VCFMissingFormatException;
import VCF.Filters.PositionFilter;
import VCF.Filters.SampleFilter;
import VCF.Mappers.ByteMapper;
import VCF.Mappers.DoubleMapper;
import VCF.Mappers.IntegerMapper;
import VCF.Mappers.Mapper;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.function.IntFunction;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.lang3.ArrayUtils;

/**
 * Represents the data from a VCF file
 * @author Daniel Money
 * @version 1.1.3
 */
public class VCF {

    /**
     * Constructor from a VCF file
     * @param f The VCF file
     * @throws VCF.Exceptions.VCFException If there is a problem with the VCF file
     * or the data in it.
     */
    public VCF(File f) throws VCFException {
        this(f, new ArrayList<>(), new ArrayList<>(), new ArrayList<>(), new ArrayList<>(), new ArrayList<>());
    }

    /**
     * Constructor from a file, filtering positions at read time. Ensures
     * filtered positions are not stored in memory reducing memory usage when
     * reading a large VCF file with many positions that will be filtered
     * @param f The file
     * @param filters The position filters to apply
     * @throws VCF.Exceptions.VCFException If there is a problem with the VCF file
     * or the data in it.
     */
    public VCF(File f, List<PositionFilter> filters) throws VCFException {
        this(f, new ArrayList<>(), new ArrayList<>(), new ArrayList<>(), filters, new ArrayList<>());
    }

    /**
     * Constructor from a file, filtering positions at read time and changing
     * genotypes read in. By changing genotypes as they are read in any information
     * contained in the genotype field that will not be used can be discarded so
     * saving memory.
     * @param f The file
     * @param preFilters A list of filters to be applied before any changes are
     * applied (e.g. to filter out snps without the required data)
     * @param positionChangers List of changers to apply to the positions
     * @param genotypeChangers List of changers to apply to the genotypes
     * @param filters The position filters to apply (after the changers)
     * @param requiredFormats A list of formats required to be in the VCF
     * @throws VCF.Exceptions.VCFException If there is a problem with the VCF file
     * or the data in it
     */
    public VCF(File f, List<PositionFilter> preFilters, List<PositionChanger> positionChangers,
            List<GenotypeChanger> genotypeChangers, List<PositionFilter> filters, List<String> requiredFormats)
            throws VCFException {
        BufferedReader in;
        try {
            if (isGZipped(f)) {
                in = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f))));
            } else {
                in = new BufferedReader(new FileReader(f));
            }
        } catch (FileNotFoundException e) {
            throw new VCFInputException("VCF file (" + f.getPath() + ") does not exist", e);
        } catch (IOException e) {
            throw new VCFInputException("Problem reading VCF file (" + f.getPath() + ")", e);
        }

        int lineNumber = 0;
        try {
            ArrayList<PositionMeta> positionList = new ArrayList<>();
            ArrayList<RawGenotype[]> genotypeList = new ArrayList<>();
            ArrayList<String> metaLines = new ArrayList<>();
            String line;
            while ((line = in.readLine()) != null) {
                lineNumber++;
                if (line.startsWith("##")) {
                    metaLines.add(line);
                } else if (line.startsWith("#")) {
                    //Process meta data since we've read it all in
                    meta = new Meta(metaLines);
                    boolean allFormats = true;
                    StringBuilder missingString = new StringBuilder();
                    for (String format : requiredFormats) {
                        if (!meta.hasFormat(format)) {
                            allFormats = false;
                            missingString.append(" ");
                            missingString.append(format);
                        }
                    }
                    if (!allFormats) {
                        throw new VCFMissingFormatException(
                                "The VCF is missing the following required formats" + missingString.toString());
                    }

                    //Now deal with the header line
                    String[] parts = line.split("\t");
                    try {
                        samples = Arrays.copyOfRange(parts, 9, parts.length);
                    } catch (ArrayIndexOutOfBoundsException | IllegalArgumentException ex) {
                        throw new VCFHeaderLineException(
                                "Not enough fields in header line (line number " + lineNumber + ")");
                    }
                } else {
                    if (samples == null) {
                        throw new VCFHeaderLineException("Data lines occur before header line");
                    }
                    String[] parts = line.split("\t");

                    if (parts.length < samples.length + 9) {
                        throw new VCFDataLineException(
                                "Not enough fields in data line (line number " + lineNumber + ")");
                    }
                    if (parts.length > samples.length + 9) {
                        throw new VCFDataLineException(
                                "Too many fields in data line (line number " + lineNumber + ")");
                    }

                    String[] metaArray = Arrays.copyOfRange(parts, 0, 9);
                    PositionMeta pm = new PositionMeta(metaArray);

                    RawGenotype[] data = new RawGenotype[parts.length - 9];
                    for (int i = 0; i < data.length; i++) {
                        data[i] = new RawGenotype(parts[i + 9]);
                    }

                    Position p = new Position(pm, samples, data);

                    boolean preallmatch = true;
                    for (PositionFilter filter : filters) {
                        preallmatch = preallmatch && filter.test(p);
                    }
                    if (preallmatch) {

                        for (PositionChanger c : positionChangers) {
                            c.change(p);
                        }

                        for (Genotype g : p.genotypeList()) {
                            for (GenotypeChanger c : genotypeChangers) {
                                c.change(g);
                            }
                        }

                        boolean allmatch = true;
                        for (PositionFilter filter : filters) {
                            allmatch = allmatch && filter.test(p);
                        }
                        if (allmatch) {
                            positionList.add(pm);
                            genotypeList.add(data);
                        }
                    }
                }
            }

            positions = positionList.toArray(new PositionMeta[positionList.size()]);
            genotypes = genotypeList.toArray(new RawGenotype[genotypeList.size()][]);

            pVis = new boolean[positions.length];
            Arrays.fill(pVis, true);

            if (samples == null) {
                throw new VCFHeaderLineException("No data line line in VCF");
            }
            sVis = new boolean[samples.length];
            Arrays.fill(sVis, true);
        } catch (IOException e) {
            if (lineNumber == 0) {
                throw new VCFInputException("Problem reading VCF", e);
            } else {
                throw new VCFInputException("Problem reading VCF (line number " + lineNumber + ")", e);
            }
        }
    }

    /**
     * Create a  VCF object from data rather than a file
     * @param meta The meta data for the VCF
     * @param positions The positions for the VCF (which includes information
     * on samples and genotypes).
     * @throws VCFDataException If there is a problem with the passed in list of positions
     */
    public VCF(Meta meta, List<Position> positions) throws VCFDataException {
        this.meta = meta;
        samples = null;
        ArrayList<PositionMeta> positionsList = new ArrayList<>();
        ArrayList<RawGenotype[]> genotypeList = new ArrayList<>();
        for (Position p : positions) {
            if (samples == null) {
                samples = p.samples();
            } else {
                if (!Arrays.equals(samples, p.samples())) {
                    throw new VCFDataException("A position does not have the same samples as a previous position");
                }
            }
            positionsList.add(p.meta());
            genotypeList.add(p.getRawGenotypes());
        }

        this.positions = positionsList.toArray(new PositionMeta[positionsList.size()]);
        genotypes = genotypeList.toArray(new RawGenotype[genotypeList.size()][]);

        pVis = new boolean[this.positions.length];
        Arrays.fill(pVis, true);

        if (samples == null) {
            throw new VCFDataException("No positions provided from which to initalise samples");
        }
        sVis = new boolean[samples.length];
        Arrays.fill(sVis, true);
    }

    /**
     * Gets a stream of genotypes.  Genotypes are returned by position
     * i.e. the genotypes for one position are returned before moving onto the
     * next position.
     * @return The stream
     */
    public Stream<Genotype> genotypeStream() {
        return genotypesByPositionStream();
    }

    /**
     * Gets a stream of genotypes by position.
     * That is the genotypes for one position are returned before moving onto the
     * next position.
     * @return The stream
     */
    public Stream<Genotype> genotypesByPositionStream() {
        return positionStream().flatMap(p -> p.genotypeStream());
    }

    /**
     * Gets a stream of genotypes by sample.
     * That is the genotypes for one sample are returned before moving onto the
     * next sample.
     * @return The stream
     */
    public Stream<Genotype> genotypesBySampleStream() {
        return sampleStream().flatMap(s -> s.genotypeStream());
    }

    /**
     * Returns the data for a single position
     * @param position The position meta data to return the data for
     * @return The position data
     */
    public Position singlePosition(PositionMeta position) {
        return singlePosition(ArrayUtils.indexOf(positions, position));
    }

    private Position singlePosition(int i) {
        return new Position(positions[i], samples, sVis, genotypes[i]);
    }

    /**
     * Returns the data for a single sample
     * @param sample The string representing the sample to return the data for
     * @return The position data
     */
    public Sample singleSample(String sample) {
        return singleSample(ArrayUtils.indexOf(samples, sample));
    }

    private Sample singleSample(int i) {
        return new Sample(samples[i], positions, pVis, Arrays.stream(genotypes).map(genotype -> genotype[i])
                .toArray((IntFunction<RawGenotype[]>) RawGenotype[]::new));
    }

    /**
     * Returns a stream of positions in the VCF
     * @return The stream
     */
    public Stream<Position> positionStream() {
        return IntStream.range(0, genotypes.length).filter(i -> pVis[i])
                .mapToObj(i -> new Position(positions[i], samples, sVis, genotypes[i]));
    }

    /**
     * Returns a stream of samples in the VCF
     * @return The stream
     */
    public Stream<Sample> sampleStream() {
        return IntStream.range(0, samples.length).filter(i -> sVis[i])
                .mapToObj(i -> new Sample(samples[i], positions, pVis, Arrays.stream(genotypes)
                        .map(genotype -> genotype[i]).toArray((IntFunction<RawGenotype[]>) RawGenotype[]::new)));
    }

    /**
     * Get the meta information for this VCF
     * @return The meta information
     */
    public Meta getMeta() {
        return meta;
    }

    /**
     * Filter the samples based on the given filter.  Samples are merely hidden,
     * not deleted, as they can then be unhidden (see resetVisible) which makes
     * applying different filters to the same VCF much easier.
     * @param filter The sample filter to be applied.
     * @throws VCF.Exceptions.VCFDataException If there is a problem with the
     * data in the VCF
     */
    public void filterSamples(SampleFilter filter) throws VCFDataException {
        for (int i = 0; i < samples.length; i++) {
            sVis[i] = sVis[i] && filter.test(singleSample(i));
        }
    }

    /**
     * Filter the positions based on the given filter.  Positions are merely hidden,
     * not deleted, as they can then be unhidden (see resetVisible) which makes
     * applying different filters to the same VCF much easier.
     * @param filter The position filter to be applied.
     * @throws VCF.Exceptions.VCFDataException If there is a problem with the
     * data in the VCF
     */
    public void filterPositions(PositionFilter filter) throws VCFDataException {
        for (int i = 0; i < positions.length; i++) {
            pVis[i] = pVis[i] && filter.test(singlePosition(i));
        }
    }

    /**
     * Limits the VCF to the given position.  Again positions are hidden, not
     * deleted.
     * @param keep The positions to keep.
     */
    public void limitToPositions(List<PositionMeta> keep) {
        for (int oi = 0; oi < positions.length; oi++) {
            if (!keep.contains(positions[oi])) {
                pVis[oi] = false;
            }
        }
    }

    /**
     * Limits the VCF to the given samples.  Again samples are hidden, not
     * deleted.
     * @param keep The positions to keep.
     */
    public void limitToSamples(List<String> keep) {
        for (int oi = 0; oi < samples.length; oi++) {
            if (!keep.contains(samples[oi])) {
                sVis[oi] = false;
            }
        }
    }

    /**
     * Returns the number of (visible) positions in the VCF
     * @return The number of positions
     */
    public int numberPositions() {
        return (int) IntStream.range(0, pVis.length).filter(i -> pVis[i]).count();
    }

    /**
     * Returns the number of (visible) samples in the VCF
     * @return The number of samples
     */
    public int numberSamples() {
        //return samples.length;
        return (int) IntStream.range(0, sVis.length).filter(i -> sVis[i]).count();
    }

    /**
     * Writes the VCF to a file. Only visible samples / positions are written.
     * @param f The file to write to
     * @throws IOException If there is an IO problem
     */
    public void writeFile(File f) throws IOException {
        PrintWriter out;
        if (f.getName().endsWith(".gz")) {
            out = new PrintWriter(new OutputStreamWriter(
                    new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(f)))));
        } else {
            out = new PrintWriter(new BufferedWriter(new FileWriter(f)));
        }
        meta.getLinesStream().forEach(out::println);

        out.print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
        Arrays.stream(getSamples()).forEach(s -> out.print("\t" + s));
        out.println();

        positionStream().forEach(p -> out.println(p.toText()));
        out.close();
    }

    /**
     * Gets data from the genotypes in the VCF as an array
     * @param <V> The type of data returned
     * @param format The format in the genotype data to get the data from
     * @param mapper A mapper mapping from the string (in the VCF) to the
     * required type
     * @return The array
     * @throws VCF.Exceptions.VCFDataException If there is a problem with the
     * data in the VCF
     */
    public <V> V[][] asArray(String format, Mapper<V> mapper) throws VCFDataException {
        V[][] array = mapper.get2DArray(numberPositions());

        int cp = 0;
        for (int i = 0; i < genotypes.length; i++) {
            if (pVis[i]) {
                int cs = 0;
                V[] a = mapper.getArray(numberSamples());
                for (int j = 0; j < genotypes[i].length; j++) {
                    if (sVis[i]) {
                        a[cs] = mapper.map(new Genotype(genotypes[i][j], positions[i], samples[j]).getData(format));
                        cs++;
                    }
                }
                array[cp] = a;
                cp++;
            }
        }

        return array;
    }

    /**
     * Gets data from the genotypes in the VCF as a transposed array
     * @param <V> The type of data returned
     * @param format The format in the genotype data to get the data from
     * @param mapper A mapper mapping from the string (in the VCF) to the
     * required type
     * @return The array
     * @throws VCF.Exceptions.VCFDataException If there is a problem with the
     * data in the VCF
     */
    public <V> V[][] asArrayTransposed(String format, Mapper<V> mapper) throws VCFDataException {
        V[][] array = mapper.get2DArray(numberSamples());

        int cs = 0;
        for (int i = 0; i < genotypes[0].length; i++) {
            if (sVis[i]) {
                int cp = 0;
                V[] a = mapper.getArray(numberPositions());
                for (int j = 0; j < genotypes.length; j++) {
                    if (pVis[j]) {
                        a[cp] = mapper.map(new Genotype(genotypes[j][i], positions[j], samples[i]).getData(format));
                        cp++;
                    }
                }
                array[cs] = a;
                cs++;
            }
        }

        return array;
    }

    /**
     * Gets data from the genotypes in the VCF as an integer array
     * @param format The format in the genotype data to get the data from
     * @param mapper A mapper mapping from the string (in the VCF) to an integer
     * @return The array
     * @throws VCF.Exceptions.VCFDataException If there is a problem with the
     * data in the VCF
     */
    public int[][] asIntegerArray(String format, IntegerMapper mapper) throws VCFDataException {
        int[][] array = new int[numberPositions()][];

        int cp = 0;
        for (int i = 0; i < genotypes.length; i++) {
            if (pVis[i]) {
                int cs = 0;
                int[] a = new int[numberSamples()];
                for (int j = 0; j < genotypes[i].length; j++) {
                    if (sVis[i]) {
                        a[cs] = mapper.map(new Genotype(genotypes[i][j], positions[i], samples[j]).getData(format));
                        cs++;
                    }
                }
                array[cp] = a;
                cp++;
            }
        }

        return array;
    }

    /**
     * Gets data from the genotypes in the VCF as a transposed integer array
     * @param format The format in the genotype data to get the data from
     * @param mapper A mapper mapping from the string (in the VCF) to an integer
     * @return The array
     * @throws VCF.Exceptions.VCFDataException If there is a problem with the
     * data in the VCF
     */
    public int[][] asIntegerArrayTransposed(String format, IntegerMapper mapper) throws VCFDataException {
        int[][] array = new int[numberSamples()][];

        int cs = 0;
        for (int i = 0; i < genotypes[0].length; i++) {
            if (sVis[i]) {
                int cp = 0;
                int[] a = new int[numberPositions()];
                for (int j = 0; j < genotypes.length; j++) {
                    if (pVis[j]) {
                        a[cp] = mapper.map(new Genotype(genotypes[j][i], positions[j], samples[i]).getData(format));
                        cp++;
                    }
                }
                array[cs] = a;
                cs++;
            }
        }

        return array;
    }

    /**
     * Gets data from the genotypes in the VCF as a double array
     * @param format The format in the genotype data to get the data from
     * @param mapper A mapper mapping from the string (in the VCF) to a double
     * @return The array
     * @throws VCF.Exceptions.VCFDataException If there is a problem with the
     * data in the VCF
     */
    public double[][] asDoubleArray(String format, DoubleMapper mapper) throws VCFDataException {
        double[][] array = new double[numberPositions()][];

        int cp = 0;
        for (int i = 0; i < genotypes.length; i++) {
            if (pVis[i]) {
                int cs = 0;
                double[] a = new double[numberSamples()];
                for (int j = 0; j < genotypes[i].length; j++) {
                    if (sVis[i]) {
                        a[cs] = mapper.map(new Genotype(genotypes[i][j], positions[i], samples[j]).getData(format));
                        cs++;
                    }
                }
                array[cp] = a;
                cp++;
            }
        }

        return array;
    }

    /**
     * Gets data from the genotypes in the VCF as a transposed double array
     * @param format The format in the genotype data to get the data from
     * @param mapper A mapper mapping from the string (in the VCF) to a double
     * @return The array
     * @throws VCF.Exceptions.VCFDataException If there is a problem with the
     * data in the VCF
     */
    public double[][] asDoubleArrayTransposed(String format, DoubleMapper mapper) throws VCFDataException {
        double[][] array = new double[numberSamples()][];

        int cs = 0;
        for (int i = 0; i < genotypes[0].length; i++) {
            if (sVis[i]) {
                int cp = 0;
                double[] a = new double[numberPositions()];
                for (int j = 0; j < genotypes.length; j++) {
                    if (pVis[j]) {
                        a[cp] = mapper.map(new Genotype(genotypes[j][i], positions[j], samples[i]).getData(format));
                        cp++;
                    }
                }
                array[cs] = a;
                cs++;
            }
        }

        return array;
    }

    /**
     * Gets data from the genotypes in the VCF as a byte array
     * @param format The format in the genotype data to get the data from
     * @param mapper A mapper mapping from the string (in the VCF) to a byte
     * @return The array
     * @throws VCF.Exceptions.VCFDataException If there is a problem with the
     * data in the VCF
     */
    public byte[][] asByteArray(String format, ByteMapper mapper) throws VCFDataException {
        byte[][] array = new byte[numberPositions()][];

        int cp = 0;
        for (int i = 0; i < genotypes.length; i++) {
            if (pVis[i]) {
                int cs = 0;
                byte[] a = new byte[numberSamples()];
                for (int j = 0; j < genotypes[i].length; j++) {
                    if (sVis[i]) {
                        a[cs] = mapper.map(new Genotype(genotypes[i][j], positions[i], samples[j]).getData(format));
                        cs++;
                    }
                }
                array[cp] = a;
                cp++;
            }
        }

        return array;
    }

    /**
     * Gets data from the genotypes in the VCF as a transposed byte array
     * @param format The format in the genotype data to get the data from
     * @param mapper A mapper mapping from the string (in the VCF) to a byte
     * @return The array
     * @throws VCF.Exceptions.VCFDataException If there is a problem with the
     * data in the VCF
     */
    public byte[][] asByteArrayTransposed(String format, ByteMapper mapper) throws VCFDataException {
        byte[][] array = new byte[numberSamples()][];

        int cs = 0;
        for (int i = 0; i < genotypes[0].length; i++) {
            if (sVis[i]) {
                int cp = 0;
                byte[] a = new byte[numberPositions()];
                for (int j = 0; j < genotypes.length; j++) {
                    if (pVis[j]) {
                        a[cp] = mapper.map(new Genotype(genotypes[j][i], positions[j], samples[i]).getData(format));
                        cp++;
                    }
                }
                array[cs] = a;
                cs++;
            }
        }

        return array;
    }

    /**
     * Get a stream of sample names
     * @return The stream
     */
    public String[] getSamples() {
        //return samples;
        return IntStream.range(0, samples.length).filter(i -> sVis[i]).mapToObj(i -> samples[i])
                .toArray(String[]::new);
    }

    /**
     * Get a stream of position meta data
     * @return The stream
     */
    public PositionMeta[] getPositions() {
        //return positions;
        return IntStream.range(0, positions.length).filter(i -> pVis[i]).mapToObj(i -> positions[i])
                .toArray((IntFunction<PositionMeta[]>) PositionMeta[]::new);
    }

    /**
     * Resets all samples and positions to be visible, that is the state
     * immediately after the VCF was constructed.
     */
    public void resetVisible() {
        Arrays.fill(pVis, true);
        Arrays.fill(sVis, true);
    }

    private Meta meta;
    private RawGenotype[][] genotypes;
    private PositionMeta[] positions;
    private String[] samples;
    private boolean[] pVis;
    private boolean[] sVis;

    /**
     * Utility function that returns the number of positions in a file
     * without reading in any data
     * @param f The VCF file
     * @return The number of positions
     * @throws VCF.Exceptions.VCFInputException If there is a problem with reading
     * the VCF
     */
    public static int numberPositionsFromFile(File f) throws VCFInputException {
        BufferedReader in;
        try {
            if (isGZipped(f)) {
                in = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f))));
            } else {
                in = new BufferedReader(new FileReader(f));
            }

            int lines = 0;
            String line;
            while ((line = in.readLine()) != null) {
                if (!line.startsWith("#")) {
                    lines++;
                }
            }
            in.close();
            return lines;
        } catch (FileNotFoundException e) {
            throw new VCFInputException("VCF file (" + f.getPath() + ") does not exist", e);
        } catch (IOException e) {
            throw new VCFInputException("Problem reading VCF file (" + f.getPath() + ")", e);
        }
    }

    /**
     * Utility function that returns the number of samples in a file
     * without reading in any data
     * @param f The VCF file
     * @return The number of samples
     * @throws IOException If there is an IO problem
     */
    public static int numberSamplesFromFile(File f) throws IOException {
        BufferedReader reader = new BufferedReader(new FileReader(f));
        String line;
        while (!(line = reader.readLine()).matches("^#[^#].*"))
            ;
        return (line.split("\t").length - 9);
    }

    // Can't believe there's not a better way to do this but google suggest not
    // From https://stackoverflow.com/questions/30507653
    private static boolean isGZipped(File f) throws IOException {
        int magic;
        RandomAccessFile raf = new RandomAccessFile(f, "r");
        magic = raf.read() & 0xff | ((raf.read() << 8) & 0xff00);
        raf.close();
        return magic == GZIPInputStream.GZIP_MAGIC;
    }
}