org.dishevelled.variation.vcf.VcfParser.java Source code

Java tutorial

Introduction

Here is the source code for org.dishevelled.variation.vcf.VcfParser.java

Source

/*
    
dsh-variation  Variation.
Copyright (c) 2013-2014 held jointly by the individual authors.
    
This library is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
    
This library is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.
    
You should have received a copy of the GNU Lesser General Public License
along with this library;  if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA.
    
> http://www.fsf.org/licensing/licenses/lgpl.html
> http://www.opensource.org/licenses/lgpl-license.php
    
*/
package org.dishevelled.variation.vcf;

import static com.google.common.base.Preconditions.checkNotNull;

import static com.google.common.collect.Maps.newHashMap;

import java.io.Closeable;
import java.io.IOException;

import java.util.Map;

import com.google.common.collect.ImmutableMap;

import com.google.common.io.CharStreams;
import com.google.common.io.InputSupplier;
import com.google.common.io.LineProcessor;

/**
 * Low-level VCF parser.
 *
 * @author  Michael Heuer
 */
final class VcfParser {

    /**
     * Private no-arg constructor.
     */
    private VcfParser() {
        // empty
    }

    /**
     * Parse the specified input supplier.
     *
     * @param supplier input supplier, must not be null
     * @param listener low-level event based parser callback, must not be null
     * @throws IOException if an I/O error occurs
     */
    static <R extends Readable & Closeable> void parse(final InputSupplier<R> supplier,
            final VcfParseListener listener) throws IOException {
        checkNotNull(supplier);
        VcfLineProcessor lineProcessor = new VcfLineProcessor(listener);
        CharStreams.readLines(supplier, lineProcessor);
    }

    /**
     * VCF line processor.
     */
    private static final class VcfLineProcessor implements LineProcessor<Object> {
        /** Line number. */
        private long lineNumber = 0;

        /** VCF parse listener. */
        private final VcfParseListener listener;

        /** Map of sample names keyed by column. */
        private final Map<Integer, String> samples = newHashMap();

        /**
         * Create a new VCF line processor.
         *
         * @param listener VCF parse listener
         */
        private VcfLineProcessor(final VcfParseListener listener) {
            checkNotNull(listener);
            this.listener = listener;
        }

        @Override
        public Object getResult() {
            return null;
        }

        @Override
        public boolean processLine(final String line) throws IOException {
            lineNumber++;
            // consider using guava Splitter
            String[] tokens = line.split("\t");

            if (tokens[0].startsWith("##")) {
                // meta-information lines
                listener.meta(line);
            } else if (tokens[0].startsWith("#CHROM")) {
                // header line
                if (tokens.length > 8) {
                    for (int column = 9, columns = tokens.length; column < columns; column++) {
                        samples.put(column, tokens[column]);
                    }
                }
                listener.samples(samples.values().toArray(new String[0]));
            } else {
                // data lines
                listener.lineNumber(lineNumber);
                listener.chrom(tokens[0]);
                listener.pos(Long.parseLong(tokens[1]));
                listener.id(tokens[2].split(";"));
                listener.ref(tokens[3]);

                // todo: check for symbolic alleles
                listener.alt(tokens[4].split(","));

                listener.qual(Double.parseDouble(tokens[5]));
                listener.filter(tokens[6]);

                String[] infoTokens = tokens[7].split(";");
                ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
                for (String infoToken : infoTokens) {
                    String[] entryTokens = infoToken.split("=");
                    if (entryTokens.length == 2) {
                        builder.put(entryTokens[0], entryTokens[1]);
                    }
                }
                listener.info(builder.build());

                if (tokens.length > 8) {
                    for (int column = 9, columns = tokens.length; column < columns; column++) {
                        String[] sampleTokens = tokens[column].split(":");
                        // todo:  assumes GT exists and is first field
                        listener.gt(samples.get(column), sampleTokens[0]);
                    }
                }

                if (!listener.complete()) {
                    return false;
                }
            }
            return true;
        }
    }
}