com.cloudera.science.quince.SampleUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.science.quince.SampleUtils.java

Source

/*
 * Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
 *
 * Cloudera, Inc. licenses this file to you under the Apache License,
 * Version 2.0 (the "License"). You may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for
 * the specific language governing permissions and limitations under the
 * License.
 */

package com.cloudera.science.quince;

import htsjdk.variant.vcf.VCFHeader;
import java.io.IOException;
import java.io.InputStream;
import java.util.LinkedHashSet;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.opencb.hpg.bigdata.core.converters.FullVcfCodec;
import org.opencb.hpg.bigdata.core.io.VcfBlockIterator;

public final class SampleUtils {

    private SampleUtils() {
    }

    public static Set<String> uniqueSamples(Configuration conf, Path[] vcfs) throws IOException {
        Set<String> samples = new LinkedHashSet<>();
        for (Path vcf : vcfs) {
            InputStream inputStream = vcf.getFileSystem(conf).open(vcf);
            VcfBlockIterator iterator = new VcfBlockIterator(inputStream, new FullVcfCodec());
            VCFHeader header = iterator.getHeader();
            samples.addAll(header.getGenotypeSamples());
        }
        return samples;
    }
}