Example usage for org.springframework.data.mongodb.core MongoTemplate getCollection

Introduction

In this page you can find the example usage for org.springframework.data.mongodb.core MongoTemplate getCollection.

Prototype

@SuppressWarnings("ConstantConditions")
    public MongoCollection<Document> getCollection(final String collectionName)

Source Link

Usage

From source file:com.avanza.ymer.YmerInitialLoadIntegrationTest.java

@Test
public void migratesOldDocumentOnInitialLoad() throws Exception {
    BasicDBObject spaceObjectV1 = new BasicDBObject();
    spaceObjectV1.put("_id", "id_v1");
    spaceObjectV1.put("message", "Msg_V1");

    BasicDBObject spaceObjectV2 = new BasicDBObject();
    spaceObjectV2.put("_id", "id_v2");
    spaceObjectV2.put("message", "Msg_V2");
    spaceObjectV2.put(MirroredObject.DOCUMENT_FORMAT_VERSION_PROPERTY, 2);

    BasicDBObject spaceOtherObject = new BasicDBObject();
    spaceOtherObject.put("_id", "otherId");
    spaceOtherObject.put("message", "Msg_V1");

    MongoTemplate mongoTemplate = mirrorEnv.getMongoTemplate();
    mongoTemplate.getCollection(mirroredObject.getCollectionName()).insert(spaceObjectV1);
    mongoTemplate.getCollection(mirroredObject.getCollectionName()).insert(spaceObjectV2);
    mongoTemplate.getCollection(mirroredOtherDocument.getCollectionName()).insert(spaceOtherObject);

    pu.start();//  w ww.  j  av a  2  s . c  om

    // Verify SpaceObject
    GigaSpace gigaSpace = pu.getClusteredGigaSpace();
    assertEquals(2, gigaSpace.count(new TestSpaceObject()));
    TestSpaceObject testSpaceObject = gigaSpace.readById(TestSpaceObject.class, "id_v1");
    assertEquals("patched_Msg_V1", testSpaceObject.getMessage());

    List<BasicDBObject> allDocs = mongoTemplate.findAll(BasicDBObject.class,
            mirroredObject.getCollectionName());
    assertEquals(2, allDocs.size());
    assertEquals(2, mirroredObject.getDocumentVersion(allDocs.get(0)));
    assertEquals(2, mirroredObject.getDocumentVersion(allDocs.get(1)));

    // Verify SpaceOtherObject
    assertEquals(1, gigaSpace.count(new TestSpaceOtherObject()));
    TestSpaceOtherObject testSpaceOtherObject = gigaSpace.readById(TestSpaceOtherObject.class, "otherId");
    assertEquals("patched_Msg_V1", testSpaceOtherObject.getMessage());

    List<BasicDBObject> allOtherDocs = mongoTemplate.findAll(BasicDBObject.class,
            mirroredOtherDocument.getCollectionName());
    assertEquals(1, allOtherDocs.size());
    assertEquals(1, mirroredOtherDocument.getDocumentVersion(allOtherDocs.get(0)));
}

From source file:com.skymobi.monitor.service.LogsService.java

public DBCursor findLogs(String projectName, LogQuery logQuery, int max) throws ParseException {
    Project project = projectService.findProject(projectName);
    MongoTemplate template = project.fetchMongoTemplate();

    Query query = new BasicQuery(logQuery.toQuery());
    query.limit(max);//from w  w  w  .  ja v a 2s. com

    query.sort().on("timestamp", Order.DESCENDING);
    logger.debug("find logs from {}  by query {} by sort {}",
            new Object[] { project.getLogCollection(), query.getQueryObject(), query.getSortObject() });
    DBCursor cursor = template.getCollection(project.getLogCollection()).find(query.getQueryObject())
            .sort(query.getSortObject()).limit(max);
    return cursor;
}

From source file:com.appleframework.monitor.service.LogsService.java

public DBCursor findLogs(String projectName, LogQuery logQuery, int max) throws ParseException {
    Project project = projectService.findProject(projectName);
    MongoTemplate template = project.fetchMongoTemplate();

    Query query = new BasicQuery(logQuery.toQuery());
    query.limit(max);//  w  w w.j  ava2s. c om

    //query.sort().on("timestamp", Order.DESCENDING);
    query.with(new Sort(Direction.DESC, "timestamp"));
    logger.debug("find logs from {}  by query {} by sort {}",
            new Object[] { project.getLogCollection(), query.getQueryObject(), query.getSortObject() });
    DBCursor cursor = template.getCollection(project.getLogCollection()).find(query.getQueryObject())
            .sort(query.getSortObject()).limit(max);
    return cursor;
}

From source file:fr.cirad.mgdb.exporting.markeroriented.EigenstratExportHandler.java

@Override
public void exportData(OutputStream outputStream, String sModule, List<SampleId> sampleIDs,
        ProgressIndicator progress, DBCursor markerCursor, Map<Comparable, Comparable> markerSynonyms,
        int nMinimumGenotypeQuality, int nMinimumReadDepth, Map<String, InputStream> readyToExportFiles)
        throws Exception {
    // long before = System.currentTimeMillis();

    File warningFile = File.createTempFile("export_warnings_", "");
    FileWriter warningFileWriter = new FileWriter(warningFile);
    File snpFile = null;/* w w w  .  java 2 s  .  com*/

    try {
        snpFile = File.createTempFile("snpFile", "");
        FileWriter snpFileWriter = new FileWriter(snpFile);

        ZipOutputStream zos = new ZipOutputStream(outputStream);
        if (ByteArrayOutputStream.class.isAssignableFrom(outputStream.getClass()))
            zos.setLevel(ZipOutputStream.STORED);

        if (readyToExportFiles != null)
            for (String readyToExportFile : readyToExportFiles.keySet()) {
                zos.putNextEntry(new ZipEntry(readyToExportFile));
                InputStream inputStream = readyToExportFiles.get(readyToExportFile);
                byte[] dataBlock = new byte[1024];
                int count = inputStream.read(dataBlock, 0, 1024);
                while (count != -1) {
                    zos.write(dataBlock, 0, count);
                    count = inputStream.read(dataBlock, 0, 1024);
                }
            }

        MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
        int markerCount = markerCursor.count();

        List<Individual> individuals = getIndividualsFromSamples(sModule, sampleIDs);

        ArrayList<String> individualList = new ArrayList<String>();
        StringBuffer indFileContents = new StringBuffer();

        for (int i = 0; i < sampleIDs.size(); i++) {
            Individual individual = individuals.get(i);
            if (!individualList.contains(individual.getId())) {
                individualList.add(individual.getId());
                indFileContents
                        .append(individual.getId() + "\t" + getIndividualGenderCode(sModule, individual.getId())
                                + "\t" + (individual.getPopulation() == null ? "." : individual.getPopulation())
                                + LINE_SEPARATOR);
            }
        }

        String exportName = sModule + "_" + markerCount + "variants_" + individualList.size() + "individuals";
        zos.putNextEntry(new ZipEntry(exportName + ".ind"));
        zos.write(indFileContents.toString().getBytes());

        zos.putNextEntry(new ZipEntry(exportName + ".eigenstratgeno"));

        int avgObjSize = (Integer) mongoTemplate
                .getCollection(mongoTemplate.getCollectionName(VariantRunData.class)).getStats()
                .get("avgObjSize");
        int nChunkSize = nMaxChunkSizeInMb * 1024 * 1024 / avgObjSize;
        short nProgress = 0, nPreviousProgress = 0;
        long nLoadedMarkerCount = 0;

        while (markerCursor.hasNext()) {
            int nLoadedMarkerCountInLoop = 0;
            Map<Comparable, String> markerChromosomalPositions = new LinkedHashMap<Comparable, String>();
            boolean fStartingNewChunk = true;
            markerCursor.batchSize(nChunkSize);
            while (markerCursor.hasNext()
                    && (fStartingNewChunk || nLoadedMarkerCountInLoop % nChunkSize != 0)) {
                DBObject exportVariant = markerCursor.next();
                DBObject refPos = (DBObject) exportVariant.get(VariantData.FIELDNAME_REFERENCE_POSITION);
                markerChromosomalPositions.put((Comparable) exportVariant.get("_id"),
                        refPos.get(ReferencePosition.FIELDNAME_SEQUENCE) + ":"
                                + refPos.get(ReferencePosition.FIELDNAME_START_SITE));
                nLoadedMarkerCountInLoop++;
                fStartingNewChunk = false;
            }

            List<Comparable> currentMarkers = new ArrayList<Comparable>(markerChromosomalPositions.keySet());
            LinkedHashMap<VariantData, Collection<VariantRunData>> variantsAndRuns = MgdbDao.getSampleGenotypes(
                    mongoTemplate, sampleIDs, currentMarkers, true,
                    null /*new Sort(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ChromosomalPosition.FIELDNAME_SEQUENCE).and(new Sort(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ChromosomalPosition.FIELDNAME_START_SITE))*/); // query mongo db for matching genotypes
            for (VariantData variant : variantsAndRuns.keySet()) // read data and write results into temporary files (one per sample)
            {
                Comparable variantId = variant.getId();

                List<String> chromAndPos = Helper.split(markerChromosomalPositions.get(variantId), ":");
                if (chromAndPos.size() == 0)
                    LOG.warn("Chromosomal position not found for marker " + variantId);
                // LOG.debug(marker + "\t" + (chromAndPos.length == 0 ? "0" : chromAndPos[0]) + "\t" + 0 + "\t" + (chromAndPos.length == 0 ? 0l : Long.parseLong(chromAndPos[1])) + LINE_SEPARATOR);
                if (markerSynonyms != null) {
                    Comparable syn = markerSynonyms.get(variantId);
                    if (syn != null)
                        variantId = syn;
                }
                snpFileWriter.write(variantId + "\t" + (chromAndPos.size() == 0 ? "0" : chromAndPos.get(0))
                        + "\t" + 0 + "\t" + (chromAndPos.size() == 0 ? 0l : Long.parseLong(chromAndPos.get(1)))
                        + LINE_SEPARATOR);

                Map<String, List<String>> individualGenotypes = new LinkedHashMap<String, List<String>>();
                Collection<VariantRunData> runs = variantsAndRuns.get(variant);
                if (runs != null)
                    for (VariantRunData run : runs)
                        for (Integer sampleIndex : run.getSampleGenotypes().keySet()) {
                            SampleGenotype sampleGenotype = run.getSampleGenotypes().get(sampleIndex);
                            String individualId = individuals
                                    .get(sampleIDs
                                            .indexOf(new SampleId(run.getId().getProjectId(), sampleIndex)))
                                    .getId();

                            Integer gq = null;
                            try {
                                gq = (Integer) sampleGenotype.getAdditionalInfo().get(VariantData.GT_FIELD_GQ);
                            } catch (Exception ignored) {
                            }
                            if (gq != null && gq < nMinimumGenotypeQuality)
                                continue;

                            Integer dp = null;
                            try {
                                dp = (Integer) sampleGenotype.getAdditionalInfo().get(VariantData.GT_FIELD_DP);
                            } catch (Exception ignored) {
                            }
                            if (dp != null && dp < nMinimumReadDepth)
                                continue;

                            String gtCode = sampleGenotype.getCode();
                            List<String> storedIndividualGenotypes = individualGenotypes.get(individualId);
                            if (storedIndividualGenotypes == null) {
                                storedIndividualGenotypes = new ArrayList<String>();
                                individualGenotypes.put(individualId, storedIndividualGenotypes);
                            }
                            storedIndividualGenotypes.add(gtCode);
                        }

                for (int j = 0; j < individualList
                        .size(); j++ /* we use this list because it has the proper ordering*/) {
                    String individualId = individualList.get(j);
                    List<String> genotypes = individualGenotypes.get(individualId);
                    HashMap<Object, Integer> genotypeCounts = new HashMap<Object, Integer>(); // will help us to keep track of missing genotypes
                    int highestGenotypeCount = 0;
                    String mostFrequentGenotype = null;
                    if (genotypes != null)
                        for (String genotype : genotypes) {
                            if (genotype.length() == 0)
                                continue; /* skip missing genotypes */

                            int gtCount = 1 + MgdbDao.getCountForKey(genotypeCounts, genotype);
                            if (gtCount > highestGenotypeCount) {
                                highestGenotypeCount = gtCount;
                                mostFrequentGenotype = genotype;
                            }
                            genotypeCounts.put(genotype, gtCount);
                        }

                    List<String> alleles = mostFrequentGenotype == null ? new ArrayList<String>()
                            : variant.getAllelesFromGenotypeCode(mostFrequentGenotype);

                    int nOutputCode = 0;
                    if (mostFrequentGenotype == null)
                        nOutputCode = 9;
                    else
                        for (String all : Helper.split(mostFrequentGenotype, "/"))
                            if ("0".equals(all))
                                nOutputCode++;
                    if (j == 0 && variant.getKnownAlleleList().size() > 2)
                        warningFileWriter.write("- Variant " + variant.getId()
                                + " is multi-allelic. Make sure Eigenstrat genotype encoding specifications are suitable for you.\n");
                    zos.write(("" + nOutputCode).getBytes());

                    if (genotypeCounts.size() > 1 || alleles.size() > 2) {
                        if (genotypeCounts.size() > 1)
                            warningFileWriter.write("- Dissimilar genotypes found for variant "
                                    + (variantId == null ? variant.getId() : variantId) + ", individual "
                                    + individualId + ". Exporting most frequent: " + nOutputCode + "\n");
                        if (alleles.size() > 2)
                            warningFileWriter.write("- More than 2 alleles found for variant "
                                    + (variantId == null ? variant.getId() : variantId) + ", individual "
                                    + individualId + ". Exporting only the first 2 alleles.\n");
                    }
                }
                zos.write((LINE_SEPARATOR).getBytes());
            }

            if (progress.hasAborted())
                return;

            nLoadedMarkerCount += nLoadedMarkerCountInLoop;
            nProgress = (short) (nLoadedMarkerCount * 100 / markerCount);
            if (nProgress > nPreviousProgress) {
                // if (nProgress%5 == 0)
                //    LOG.info("============= exportData: " + nProgress + "% =============" + (System.currentTimeMillis() - before)/1000 + "s");
                progress.setCurrentStepProgress(nProgress);
                nPreviousProgress = nProgress;
            }
        }

        snpFileWriter.close();
        zos.putNextEntry(new ZipEntry(exportName + ".snp"));
        BufferedReader in = new BufferedReader(new FileReader(snpFile));
        String sLine;
        while ((sLine = in.readLine()) != null)
            zos.write((sLine + "\n").getBytes());
        in.close();

        warningFileWriter.close();
        if (warningFile.length() > 0) {
            zos.putNextEntry(new ZipEntry(exportName + "-REMARKS.txt"));
            int nWarningCount = 0;
            in = new BufferedReader(new FileReader(warningFile));
            while ((sLine = in.readLine()) != null) {
                zos.write((sLine + "\n").getBytes());
                nWarningCount++;
            }
            LOG.info("Number of Warnings for export (" + exportName + "): " + nWarningCount);
            in.close();
        }
        warningFile.delete();

        zos.close();
        progress.setCurrentStepProgress((short) 100);
    } finally {
        if (snpFile != null && snpFile.exists())
            snpFile.delete();
    }
}

From source file:fr.cirad.mgdb.exporting.markeroriented.VcfExportHandler.java

@Override
public void exportData(OutputStream outputStream, String sModule, List<SampleId> sampleIDs,
        ProgressIndicator progress, DBCursor markerCursor, Map<Comparable, Comparable> markerSynonyms,
        int nMinimumGenotypeQuality, int nMinimumReadDepth, Map<String, InputStream> readyToExportFiles)
        throws Exception {
    Integer projectId = null;//  w w w  .j  av  a2s .co  m
    for (SampleId spId : sampleIDs) {
        if (projectId == null)
            projectId = spId.getProject();
        else if (projectId != spId.getProject()) {
            projectId = 0;
            break; // more than one project are involved: no header will be written
        }
    }

    File warningFile = File.createTempFile("export_warnings_", "");
    FileWriter warningFileWriter = new FileWriter(warningFile);

    MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
    int markerCount = markerCursor.count();
    ZipOutputStream zos = new ZipOutputStream(outputStream);

    if (readyToExportFiles != null)
        for (String readyToExportFile : readyToExportFiles.keySet()) {
            zos.putNextEntry(new ZipEntry(readyToExportFile));
            InputStream inputStream = readyToExportFiles.get(readyToExportFile);
            byte[] dataBlock = new byte[1024];
            int count = inputStream.read(dataBlock, 0, 1024);
            while (count != -1) {
                zos.write(dataBlock, 0, count);
                count = inputStream.read(dataBlock, 0, 1024);
            }
        }

    LinkedHashMap<SampleId, String> sampleIDToIndividualIdMap = new LinkedHashMap<SampleId, String>();
    ArrayList<String> individualList = new ArrayList<String>();
    List<Individual> individuals = getIndividualsFromSamples(sModule, sampleIDs);
    for (int i = 0; i < sampleIDs.size(); i++) {
        String individualId = individuals.get(i).getId();
        sampleIDToIndividualIdMap.put(sampleIDs.get(i), individualId);
        if (!individualList.contains(individualId)) {
            individualList.add(individualId);
        }
    }

    String exportName = sModule + "_" + markerCount + "variants_" + individualList.size() + "individuals";
    zos.putNextEntry(new ZipEntry(exportName + ".vcf"));

    int avgObjSize = (Integer) mongoTemplate
            .getCollection(mongoTemplate.getCollectionName(VariantRunData.class)).getStats().get("avgObjSize");
    int nQueryChunkSize = nMaxChunkSizeInMb * 1024 * 1024 / avgObjSize;

    VariantContextWriter writer = null;
    try {
        List<String> distinctSequenceNames = new ArrayList<String>();

        String sequenceSeqCollName = MongoTemplateManager.getMongoCollectionName(Sequence.class);
        if (mongoTemplate.collectionExists(sequenceSeqCollName)) {
            DBCursor markerCursorCopy = markerCursor.copy();
            markerCursorCopy.batchSize(nQueryChunkSize);
            while (markerCursorCopy.hasNext()) {
                int nLoadedMarkerCountInLoop = 0;
                boolean fStartingNewChunk = true;
                while (markerCursorCopy.hasNext()
                        && (fStartingNewChunk || nLoadedMarkerCountInLoop % nQueryChunkSize != 0)) {
                    DBObject exportVariant = markerCursorCopy.next();
                    String chr = (String) ((DBObject) exportVariant
                            .get(VariantData.FIELDNAME_REFERENCE_POSITION))
                                    .get(ReferencePosition.FIELDNAME_SEQUENCE);
                    if (!distinctSequenceNames.contains(chr))
                        distinctSequenceNames.add(chr);
                }
            }
            markerCursorCopy.close();
        }

        Collections.sort(distinctSequenceNames, new AlphaNumericStringComparator());
        SAMSequenceDictionary dict = createSAMSequenceDictionary(sModule, distinctSequenceNames);
        writer = new CustomVCFWriter(null, zos, dict, false, false, true);
        //         VariantContextWriterBuilder vcwb = new VariantContextWriterBuilder();
        //         vcwb.unsetOption(Options.INDEX_ON_THE_FLY);
        //         vcwb.unsetOption(Options.DO_NOT_WRITE_GENOTYPES);
        //         vcwb.setOption(Options.USE_ASYNC_IOINDEX_ON_THE_FLY);
        //         vcwb.setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
        //         vcwb.setReferenceDictionary(dict);
        //         writer = vcwb.build();
        //         writer = new AsyncVariantContextWriter(writer, 3000);

        progress.moveToNextStep(); // done with dictionary
        DBCursor headerCursor = mongoTemplate
                .getCollection(MongoTemplateManager.getMongoCollectionName(DBVCFHeader.class))
                .find(new BasicDBObject("_id." + VcfHeaderId.FIELDNAME_PROJECT, projectId));
        Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
        boolean fWriteCommandLine = true, fWriteEngineHeaders = true; // default values

        while (headerCursor.hasNext()) {
            DBVCFHeader dbVcfHeader = DBVCFHeader.fromDBObject(headerCursor.next());
            headerLines.addAll(dbVcfHeader.getHeaderLines());

            // Add sequence header lines (not stored in our vcf header collection)
            BasicDBObject projection = new BasicDBObject(SequenceStats.FIELDNAME_SEQUENCE_LENGTH, true);
            int nSequenceIndex = 0;
            for (String sequenceName : distinctSequenceNames) {
                String sequenceInfoCollName = MongoTemplateManager.getMongoCollectionName(SequenceStats.class);
                boolean fCollectionExists = mongoTemplate.collectionExists(sequenceInfoCollName);
                if (fCollectionExists) {
                    DBObject record = mongoTemplate.getCollection(sequenceInfoCollName).findOne(
                            new Query(Criteria.where("_id").is(sequenceName)).getQueryObject(), projection);
                    if (record == null) {
                        LOG.warn("Sequence '" + sequenceName + "' not found in collection "
                                + sequenceInfoCollName);
                        continue;
                    }

                    Map<String, String> sequenceLineData = new LinkedHashMap<String, String>();
                    sequenceLineData.put("ID", (String) record.get("_id"));
                    sequenceLineData.put("length",
                            ((Number) record.get(SequenceStats.FIELDNAME_SEQUENCE_LENGTH)).toString());
                    headerLines.add(new VCFContigHeaderLine(sequenceLineData, nSequenceIndex++));
                }
            }
            fWriteCommandLine = headerCursor.size() == 1 && dbVcfHeader.getWriteCommandLine(); // wouldn't make sense to include command lines for several runs
            if (!dbVcfHeader.getWriteEngineHeaders())
                fWriteEngineHeaders = false;
        }
        headerCursor.close();

        VCFHeader header = new VCFHeader(headerLines, individualList);
        header.setWriteCommandLine(fWriteCommandLine);
        header.setWriteEngineHeaders(fWriteEngineHeaders);
        writer.writeHeader(header);

        short nProgress = 0, nPreviousProgress = 0;
        long nLoadedMarkerCount = 0;
        HashMap<SampleId, Comparable /*phID*/> phasingIDsBySample = new HashMap<SampleId, Comparable>();

        while (markerCursor.hasNext()) {
            if (progress.hasAborted())
                return;

            int nLoadedMarkerCountInLoop = 0;
            boolean fStartingNewChunk = true;
            markerCursor.batchSize(nQueryChunkSize);
            List<Comparable> currentMarkers = new ArrayList<Comparable>();
            while (markerCursor.hasNext()
                    && (fStartingNewChunk || nLoadedMarkerCountInLoop % nQueryChunkSize != 0)) {
                DBObject exportVariant = markerCursor.next();
                currentMarkers.add((Comparable) exportVariant.get("_id"));
                nLoadedMarkerCountInLoop++;
                fStartingNewChunk = false;
            }

            LinkedHashMap<VariantData, Collection<VariantRunData>> variantsAndRuns = MgdbDao.getSampleGenotypes(
                    mongoTemplate, sampleIDs, currentMarkers, true,
                    null /*new Sort(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ChromosomalPosition.FIELDNAME_SEQUENCE).and(new Sort(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ChromosomalPosition.FIELDNAME_START_SITE))*/); // query mongo db for matching genotypes
            for (VariantData variant : variantsAndRuns.keySet()) {
                VariantContext vc = variant.toVariantContext(variantsAndRuns.get(variant),
                        !ObjectId.isValid(variant.getId().toString()), sampleIDToIndividualIdMap,
                        phasingIDsBySample, nMinimumGenotypeQuality, nMinimumReadDepth, warningFileWriter,
                        markerSynonyms == null ? variant.getId() : markerSynonyms.get(variant.getId()));
                try {
                    writer.add(vc);
                } catch (Throwable t) {
                    Exception e = new Exception("Unable to convert to VariantContext: " + variant.getId(), t);
                    LOG.debug("error", e);
                    throw e;
                }

                if (nLoadedMarkerCountInLoop > currentMarkers.size())
                    LOG.error("Bug: writing variant number " + nLoadedMarkerCountInLoop + " (only "
                            + currentMarkers.size() + " variants expected)");
            }

            nLoadedMarkerCount += nLoadedMarkerCountInLoop;
            nProgress = (short) (nLoadedMarkerCount * 100 / markerCount);
            if (nProgress > nPreviousProgress) {
                progress.setCurrentStepProgress(nProgress);
                nPreviousProgress = nProgress;
            }
        }
        progress.setCurrentStepProgress((short) 100);

    } catch (Exception e) {
        LOG.error("Error exporting", e);
        progress.setError(e.getMessage());
        return;
    } finally {
        warningFileWriter.close();
        if (warningFile.length() > 0) {
            zos.putNextEntry(new ZipEntry(exportName + "-REMARKS.txt"));
            int nWarningCount = 0;
            BufferedReader in = new BufferedReader(new FileReader(warningFile));
            String sLine;
            while ((sLine = in.readLine()) != null) {
                zos.write((sLine + "\n").getBytes());
                nWarningCount++;
            }
            LOG.info("Number of Warnings for export (" + exportName + "): " + nWarningCount);
            in.close();
        }
        warningFile.delete();
        if (writer != null)
            try {
                writer.close();
            } catch (Throwable ignored) {
            }
    }
}

From source file:fr.cirad.mgdb.exporting.markeroriented.HapMapExportHandler.java

@Override
public void exportData(OutputStream outputStream, String sModule, List<SampleId> sampleIDs,
        ProgressIndicator progress, DBCursor markerCursor, Map<Comparable, Comparable> markerSynonyms,
        int nMinimumGenotypeQuality, int nMinimumReadDepth, Map<String, InputStream> readyToExportFiles)
        throws Exception {
    MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
    File warningFile = File.createTempFile("export_warnings_", "");
    FileWriter warningFileWriter = new FileWriter(warningFile);

    int markerCount = markerCursor.count();

    ZipOutputStream zos = new ZipOutputStream(outputStream);

    if (readyToExportFiles != null)
        for (String readyToExportFile : readyToExportFiles.keySet()) {
            zos.putNextEntry(new ZipEntry(readyToExportFile));
            InputStream inputStream = readyToExportFiles.get(readyToExportFile);
            byte[] dataBlock = new byte[1024];
            int count = inputStream.read(dataBlock, 0, 1024);
            while (count != -1) {
                zos.write(dataBlock, 0, count);
                count = inputStream.read(dataBlock, 0, 1024);
            }//from  ww w .j  a v a 2s. com
        }

    List<Individual> individuals = getIndividualsFromSamples(sModule, sampleIDs);
    ArrayList<String> individualList = new ArrayList<String>();
    for (int i = 0; i < sampleIDs.size(); i++) {
        Individual individual = individuals.get(i);
        if (!individualList.contains(individual.getId())) {
            individualList.add(individual.getId());
        }
    }

    String exportName = sModule + "_" + markerCount + "variants_" + individualList.size() + "individuals";
    zos.putNextEntry(new ZipEntry(exportName + ".hapmap"));
    String header = "rs#" + "\t" + "alleles" + "\t" + "chrom" + "\t" + "pos" + "\t" + "strand" + "\t"
            + "assembly#" + "\t" + "center" + "\t" + "protLSID" + "\t" + "assayLSID" + "\t" + "panelLSID" + "\t"
            + "QCcode";
    zos.write(header.getBytes());
    for (int i = 0; i < individualList.size(); i++) {
        zos.write(("\t" + individualList.get(i)).getBytes());
    }
    zos.write((LINE_SEPARATOR).getBytes());

    int avgObjSize = (Integer) mongoTemplate
            .getCollection(mongoTemplate.getCollectionName(VariantRunData.class)).getStats().get("avgObjSize");
    int nChunkSize = nMaxChunkSizeInMb * 1024 * 1024 / avgObjSize;
    short nProgress = 0, nPreviousProgress = 0;
    long nLoadedMarkerCount = 0;

    while (markerCursor == null || markerCursor.hasNext()) {
        int nLoadedMarkerCountInLoop = 0;
        Map<Comparable, String> markerChromosomalPositions = new LinkedHashMap<Comparable, String>();
        boolean fStartingNewChunk = true;
        markerCursor.batchSize(nChunkSize);
        while (markerCursor.hasNext() && (fStartingNewChunk || nLoadedMarkerCountInLoop % nChunkSize != 0)) {
            DBObject exportVariant = markerCursor.next();
            DBObject refPos = (DBObject) exportVariant.get(VariantData.FIELDNAME_REFERENCE_POSITION);
            markerChromosomalPositions.put((Comparable) exportVariant.get("_id"),
                    refPos.get(ReferencePosition.FIELDNAME_SEQUENCE) + ":"
                            + refPos.get(ReferencePosition.FIELDNAME_START_SITE));
            nLoadedMarkerCountInLoop++;
            fStartingNewChunk = false;
        }

        List<Comparable> currentMarkers = new ArrayList<Comparable>(markerChromosomalPositions.keySet());
        LinkedHashMap<VariantData, Collection<VariantRunData>> variantsAndRuns = MgdbDao.getSampleGenotypes(
                mongoTemplate, sampleIDs, currentMarkers, true,
                null /*new Sort(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ChromosomalPosition.FIELDNAME_SEQUENCE).and(new Sort(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ChromosomalPosition.FIELDNAME_START_SITE))*/); // query mongo db for matching genotypes
        for (VariantData variant : variantsAndRuns.keySet()) // read data and write results into temporary files (one per sample)
        {
            Comparable variantId = variant.getId();
            if (markerSynonyms != null) {
                Comparable syn = markerSynonyms.get(variantId);
                if (syn != null)
                    variantId = syn;
            }

            boolean fIsSNP = variant.getType().equals(Type.SNP.toString());
            byte[] missingGenotype = ("\t" + "NN").getBytes();

            String[] chromAndPos = markerChromosomalPositions.get(variant.getId()).split(":");
            zos.write(((variantId == null ? variant.getId() : variantId) + "\t"
                    + StringUtils.join(variant.getKnownAlleleList(), "/") + "\t" + chromAndPos[0] + "\t"
                    + Long.parseLong(chromAndPos[1]) + "\t" + "+").getBytes());
            for (int j = 0; j < 6; j++)
                zos.write(("\t" + "NA").getBytes());

            Map<String, Integer> gqValueForSampleId = new LinkedHashMap<String, Integer>();
            Map<String, Integer> dpValueForSampleId = new LinkedHashMap<String, Integer>();
            Map<String, List<String>> individualGenotypes = new LinkedHashMap<String, List<String>>();
            Collection<VariantRunData> runs = variantsAndRuns.get(variant);
            if (runs != null)
                for (VariantRunData run : runs)
                    for (Integer sampleIndex : run.getSampleGenotypes().keySet()) {
                        SampleGenotype sampleGenotype = run.getSampleGenotypes().get(sampleIndex);
                        String gtCode = run.getSampleGenotypes().get(sampleIndex).getCode();
                        String individualId = individuals
                                .get(sampleIDs.indexOf(new SampleId(run.getId().getProjectId(), sampleIndex)))
                                .getId();
                        List<String> storedIndividualGenotypes = individualGenotypes.get(individualId);
                        if (storedIndividualGenotypes == null) {
                            storedIndividualGenotypes = new ArrayList<String>();
                            individualGenotypes.put(individualId, storedIndividualGenotypes);
                        }
                        storedIndividualGenotypes.add(gtCode);
                        gqValueForSampleId.put(individualId,
                                (Integer) sampleGenotype.getAdditionalInfo().get(VariantData.GT_FIELD_GQ));
                        dpValueForSampleId.put(individualId,
                                (Integer) sampleGenotype.getAdditionalInfo().get(VariantData.GT_FIELD_DP));
                    }

            int writtenGenotypeCount = 0;
            for (String individualId : individualList /* we use this list because it has the proper ordering */) {
                int individualIndex = individualList.indexOf(individualId);
                while (writtenGenotypeCount < individualIndex - 1) {
                    zos.write(missingGenotype);
                    writtenGenotypeCount++;
                }

                List<String> genotypes = individualGenotypes.get(individualId);
                HashMap<Object, Integer> genotypeCounts = new HashMap<Object, Integer>(); // will help us to keep track of missing genotypes
                int highestGenotypeCount = 0;
                String mostFrequentGenotype = null;
                if (genotypes != null)
                    for (String genotype : genotypes) {
                        if (genotype.length() == 0)
                            continue; /* skip missing genotypes */

                        Integer gqValue = gqValueForSampleId.get(individualId);
                        if (gqValue != null && gqValue < nMinimumGenotypeQuality)
                            continue; /* skip this sample because its GQ is under the threshold */

                        Integer dpValue = dpValueForSampleId.get(individualId);
                        if (dpValue != null && dpValue < nMinimumReadDepth)
                            continue; /* skip this sample because its DP is under the threshold */

                        int gtCount = 1 + MgdbDao.getCountForKey(genotypeCounts, genotype);
                        if (gtCount > highestGenotypeCount) {
                            highestGenotypeCount = gtCount;
                            mostFrequentGenotype = genotype;
                        }
                        genotypeCounts.put(genotype, gtCount);
                    }

                byte[] exportedGT = mostFrequentGenotype == null ? missingGenotype
                        : ("\t" + StringUtils.join(variant.getAllelesFromGenotypeCode(mostFrequentGenotype),
                                fIsSNP ? "" : "/")).getBytes();
                zos.write(exportedGT);
                writtenGenotypeCount++;

                if (genotypeCounts.size() > 1)
                    warningFileWriter.write("- Dissimilar genotypes found for variant "
                            + (variantId == null ? variant.getId() : variantId) + ", individual " + individualId
                            + ". Exporting most frequent: " + new String(exportedGT) + "\n");
            }

            while (writtenGenotypeCount < individualList.size()) {
                zos.write(missingGenotype);
                writtenGenotypeCount++;
            }
            zos.write((LINE_SEPARATOR).getBytes());
        }

        if (progress.hasAborted())
            return;

        nLoadedMarkerCount += nLoadedMarkerCountInLoop;
        nProgress = (short) (nLoadedMarkerCount * 100 / markerCount);
        if (nProgress > nPreviousProgress) {
            //            if (nProgress%5 == 0)
            //               LOG.info("========================= exportData: " + nProgress + "% =========================" + (System.currentTimeMillis() - before)/1000 + "s");
            progress.setCurrentStepProgress(nProgress);
            nPreviousProgress = nProgress;
        }
    }

    warningFileWriter.close();
    if (warningFile.length() > 0) {
        zos.putNextEntry(new ZipEntry(exportName + "-REMARKS.txt"));
        int nWarningCount = 0;
        BufferedReader in = new BufferedReader(new FileReader(warningFile));
        String sLine;
        while ((sLine = in.readLine()) != null) {
            zos.write((sLine + "\n").getBytes());
            in.readLine();
            nWarningCount++;
        }
        LOG.info("Number of Warnings for export (" + exportName + "): " + nWarningCount);
        in.close();
    }
    warningFile.delete();

    zos.close();
    progress.setCurrentStepProgress((short) 100);
}

From source file:fr.cirad.mgdb.exporting.markeroriented.GFFExportHandler.java

@Override
public void exportData(OutputStream outputStream, String sModule, List<SampleId> sampleIDs,
        ProgressIndicator progress, DBCursor markerCursor, Map<Comparable, Comparable> markerSynonyms,
        int nMinimumGenotypeQuality, int nMinimumReadDepth, Map<String, InputStream> readyToExportFiles)
        throws Exception {
    MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
    ZipOutputStream zos = new ZipOutputStream(outputStream);

    if (readyToExportFiles != null)
        for (String readyToExportFile : readyToExportFiles.keySet()) {
            zos.putNextEntry(new ZipEntry(readyToExportFile));
            InputStream inputStream = readyToExportFiles.get(readyToExportFile);
            byte[] dataBlock = new byte[1024];
            int count = inputStream.read(dataBlock, 0, 1024);
            while (count != -1) {
                zos.write(dataBlock, 0, count);
                count = inputStream.read(dataBlock, 0, 1024);
            }//from  w w w  . j  a  va2 s .  c o  m
        }

    File warningFile = File.createTempFile("export_warnings_", "");
    FileWriter warningFileWriter = new FileWriter(warningFile);

    int markerCount = markerCursor.count();

    List<Individual> individuals = getIndividualsFromSamples(sModule, sampleIDs);
    ArrayList<String> individualList = new ArrayList<String>();
    for (int i = 0; i < sampleIDs.size(); i++) {
        Individual individual = individuals.get(i);
        if (!individualList.contains(individual.getId())) {
            individualList.add(individual.getId());
        }
    }

    String exportName = sModule + "_" + markerCount + "variants_" + individualList.size() + "individuals";
    zos.putNextEntry(new ZipEntry(exportName + ".gff3"));
    String header = "##gff-version 3" + LINE_SEPARATOR;
    zos.write(header.getBytes());

    TreeMap<String, String> typeToOntology = new TreeMap<String, String>();
    typeToOntology.put(Type.SNP.toString(), "SO:0000694");
    typeToOntology.put(Type.INDEL.toString(), "SO:1000032");
    typeToOntology.put(Type.MIXED.toString(), "SO:0001059");
    typeToOntology.put(Type.SYMBOLIC.toString(), "SO:0000109");
    typeToOntology.put(Type.MNP.toString(), "SO:0001059");

    int avgObjSize = (Integer) mongoTemplate
            .getCollection(mongoTemplate.getCollectionName(VariantRunData.class)).getStats().get("avgObjSize");
    int nChunkSize = nMaxChunkSizeInMb * 1024 * 1024 / avgObjSize;
    short nProgress = 0, nPreviousProgress = 0;
    long nLoadedMarkerCount = 0;

    while (markerCursor.hasNext()) {
        int nLoadedMarkerCountInLoop = 0;
        Map<Comparable, String> markerChromosomalPositions = new LinkedHashMap<Comparable, String>();
        boolean fStartingNewChunk = true;
        markerCursor.batchSize(nChunkSize);
        while (markerCursor.hasNext() && (fStartingNewChunk || nLoadedMarkerCountInLoop % nChunkSize != 0)) {
            DBObject exportVariant = markerCursor.next();
            DBObject refPos = (DBObject) exportVariant.get(VariantData.FIELDNAME_REFERENCE_POSITION);
            markerChromosomalPositions.put((Comparable) exportVariant.get("_id"),
                    refPos.get(ReferencePosition.FIELDNAME_SEQUENCE) + ":"
                            + refPos.get(ReferencePosition.FIELDNAME_START_SITE));
            nLoadedMarkerCountInLoop++;
            fStartingNewChunk = false;
        }

        List<Comparable> currentMarkers = new ArrayList<Comparable>(markerChromosomalPositions.keySet());
        LinkedHashMap<VariantData, Collection<VariantRunData>> variantsAndRuns = MgdbDao.getSampleGenotypes(
                mongoTemplate, sampleIDs, currentMarkers, true,
                null /*new Sort(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ChromosomalPosition.FIELDNAME_SEQUENCE).and(new Sort(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ChromosomalPosition.FIELDNAME_START_SITE))*/); // query mongo db for matching genotypes
        for (VariantData variant : variantsAndRuns.keySet()) // read data and write results into temporary files (one per sample)
        {
            Comparable variantId = variant.getId();
            List<String> variantDataOrigin = new ArrayList<String>();

            Map<String, Integer> gqValueForSampleId = new LinkedHashMap<String, Integer>();
            Map<String, Integer> dpValueForSampleId = new LinkedHashMap<String, Integer>();
            Map<String, List<String>> individualGenotypes = new LinkedHashMap<String, List<String>>();
            List<String> chromAndPos = Helper.split(markerChromosomalPositions.get(variantId), ":");
            if (chromAndPos.size() == 0)
                LOG.warn("Chromosomal position not found for marker " + variantId);
            // LOG.debug(marker + "\t" + (chromAndPos.length == 0 ? "0" : chromAndPos[0]) + "\t" + 0 + "\t" + (chromAndPos.length == 0 ? 0l : Long.parseLong(chromAndPos[1])) + LINE_SEPARATOR);
            if (markerSynonyms != null) {
                Comparable syn = markerSynonyms.get(variantId);
                if (syn != null)
                    variantId = syn;
            }

            Collection<VariantRunData> runs = variantsAndRuns.get(variant);
            if (runs != null)
                for (VariantRunData run : runs)
                    for (Integer sampleIndex : run.getSampleGenotypes().keySet()) {
                        SampleGenotype sampleGenotype = run.getSampleGenotypes().get(sampleIndex);
                        String individualId = individuals
                                .get(sampleIDs.indexOf(new SampleId(run.getId().getProjectId(), sampleIndex)))
                                .getId();

                        Integer gq = null;
                        try {
                            gq = (Integer) sampleGenotype.getAdditionalInfo().get(VariantData.GT_FIELD_GQ);
                        } catch (Exception ignored) {
                        }
                        if (gq != null && gq < nMinimumGenotypeQuality)
                            continue;

                        Integer dp = null;
                        try {
                            dp = (Integer) sampleGenotype.getAdditionalInfo().get(VariantData.GT_FIELD_DP);
                        } catch (Exception ignored) {
                        }
                        if (dp != null && dp < nMinimumReadDepth)
                            continue;

                        String gtCode = sampleGenotype.getCode();
                        List<String> storedIndividualGenotypes = individualGenotypes.get(individualId);
                        if (storedIndividualGenotypes == null) {
                            storedIndividualGenotypes = new ArrayList<String>();
                            individualGenotypes.put(individualId, storedIndividualGenotypes);
                        }
                        storedIndividualGenotypes.add(gtCode);
                    }

            zos.write((chromAndPos.get(0) + "\t" + StringUtils.join(variantDataOrigin, ";") /*source*/ + "\t"
                    + typeToOntology.get(variant.getType()) + "\t" + Long.parseLong(chromAndPos.get(1)) + "\t"
                    + Long.parseLong(chromAndPos.get(1)) + "\t" + "." + "\t" + "+" + "\t" + "." + "\t")
                            .getBytes());
            Comparable syn = markerSynonyms == null ? null : markerSynonyms.get(variant.getId());
            zos.write(("ID=" + variant.getId() + ";" + (syn != null ? "Name=" + syn + ";" : "") + "alleles="
                    + StringUtils.join(variant.getKnownAlleleList(), "/") + ";" + "refallele="
                    + variant.getKnownAlleleList().get(0) + ";").getBytes());

            for (int j = 0; j < individualList
                    .size(); j++ /* we use this list because it has the proper ordering*/) {

                NumberFormat nf = NumberFormat.getInstance(Locale.US);
                nf.setMaximumFractionDigits(4);
                HashMap<String, Integer> compt1 = new HashMap<String, Integer>();
                int highestGenotypeCount = 0;
                int sum = 0;

                String individualId = individualList.get(j);
                List<String> genotypes = individualGenotypes.get(individualId);
                HashMap<Object, Integer> genotypeCounts = new HashMap<Object, Integer>(); // will help us to keep track of missing genotypes

                String mostFrequentGenotype = null;
                if (genotypes != null)
                    for (String genotype : genotypes) {
                        if (genotype.length() == 0)
                            continue; /* skip missing genotypes */

                        int count = 0;
                        for (String t : variant.getAllelesFromGenotypeCode(genotype)) {
                            for (String t1 : variant.getKnownAlleleList()) {
                                if (t.equals(t1) && !(compt1.containsKey(t1))) {
                                    count++;
                                    compt1.put(t1, count);
                                } else if (t.equals(t1) && compt1.containsKey(t1)) {
                                    if (compt1.get(t1) != 0) {
                                        count++;
                                        compt1.put(t1, count);
                                    } else
                                        compt1.put(t1, count);
                                } else if (!(compt1.containsKey(t1))) {
                                    compt1.put(t1, 0);
                                }
                            }
                        }
                        for (int countValue : compt1.values()) {
                            sum += countValue;
                        }

                        int gtCount = 1 + MgdbDao.getCountForKey(genotypeCounts, genotype);
                        if (gtCount > highestGenotypeCount) {
                            highestGenotypeCount = gtCount;
                            mostFrequentGenotype = genotype;
                        }
                        genotypeCounts.put(genotype, gtCount);
                    }

                List<String> alleles = mostFrequentGenotype == null ? new ArrayList<String>()
                        : variant.getAllelesFromGenotypeCode(mostFrequentGenotype);

                if (alleles.size() != 0) {
                    zos.write(("acounts=" + individualId + ":").getBytes());

                    for (String knowAllelesCompt : compt1.keySet()) {
                        zos.write(
                                (knowAllelesCompt + " " + nf.format(compt1.get(knowAllelesCompt) / (float) sum)
                                        + " " + compt1.get(knowAllelesCompt) + " ").getBytes());
                    }
                    zos.write((alleles.size() + ";").getBytes());
                }
                if (genotypeCounts.size() > 1) {
                    Comparable sVariantId = markerSynonyms != null ? markerSynonyms.get(variant.getId())
                            : variant.getId();
                    warningFileWriter.write("- Dissimilar genotypes found for variant "
                            + (sVariantId == null ? variant.getId() : sVariantId) + ", individual "
                            + individualId + ". Exporting most frequent: " + StringUtils.join(alleles, ",")
                            + "\n");
                }
            }
            zos.write((LINE_SEPARATOR).getBytes());
        }

        if (progress.hasAborted())
            return;

        nLoadedMarkerCount += nLoadedMarkerCountInLoop;
        nProgress = (short) (nLoadedMarkerCount * 100 / markerCount);
        if (nProgress > nPreviousProgress) {
            //            if (nProgress%5 == 0)
            //               LOG.info("========================= exportData: " + nProgress + "% =========================" + (System.currentTimeMillis() - before)/1000 + "s");
            progress.setCurrentStepProgress(nProgress);
            nPreviousProgress = nProgress;
        }
    }

    warningFileWriter.close();
    if (warningFile.length() > 0) {
        zos.putNextEntry(new ZipEntry(exportName + "-REMARKS.txt"));
        int nWarningCount = 0;
        BufferedReader in = new BufferedReader(new FileReader(warningFile));
        String sLine;
        while ((sLine = in.readLine()) != null) {
            zos.write((sLine + "\n").getBytes());
            in.readLine();
            nWarningCount++;
        }
        LOG.info("Number of Warnings for export (" + exportName + "): " + nWarningCount);
        in.close();
    }
    warningFile.delete();

    zos.close();
    progress.setCurrentStepProgress((short) 100);
}

From source file:fr.cirad.mgdb.exporting.individualoriented.DARwinExportHandler.java

@Override
public void exportData(OutputStream outputStream, String sModule, Collection<File> individualExportFiles,
        boolean fDeleteSampleExportFilesOnExit, ProgressIndicator progress, DBCursor markerCursor,
        Map<Comparable, Comparable> markerSynonyms, Map<String, InputStream> readyToExportFiles)
        throws Exception {
    MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
    GenotypingProject aProject = mongoTemplate.findOne(
            new Query(Criteria.where(GenotypingProject.FIELDNAME_PLOIDY_LEVEL).exists(true)),
            GenotypingProject.class);
    if (aProject == null)
        LOG.warn("Unable to find a project containing ploidy level information! Assuming ploidy level is 2.");

    int ploidy = aProject == null ? 2 : aProject.getPloidyLevel();

    File warningFile = File.createTempFile("export_warnings_", "");
    FileWriter warningFileWriter = new FileWriter(warningFile);

    int markerCount = markerCursor.count();

    ZipOutputStream zos = new ZipOutputStream(outputStream);

    if (readyToExportFiles != null)
        for (String readyToExportFile : readyToExportFiles.keySet()) {
            zos.putNextEntry(new ZipEntry(readyToExportFile));
            InputStream inputStream = readyToExportFiles.get(readyToExportFile);
            byte[] dataBlock = new byte[1024];
            int count = inputStream.read(dataBlock, 0, 1024);
            while (count != -1) {
                zos.write(dataBlock, 0, count);
                count = inputStream.read(dataBlock, 0, 1024);
            }/*from   w w w  .  ja  va  2  s. com*/
        }

    String exportName = sModule + "_" + markerCount + "variants_" + individualExportFiles.size()
            + "individuals";

    StringBuffer donFileContents = new StringBuffer(
            "@DARwin 5.0 - DON -" + LINE_SEPARATOR + individualExportFiles.size() + "\t" + 1 + LINE_SEPARATOR
                    + "N" + "\t" + "individual" + LINE_SEPARATOR);

    int count = 0;
    String missingGenotype = "";
    for (int j = 0; j < ploidy; j++)
        missingGenotype += "\tN";

    zos.putNextEntry(new ZipEntry(exportName + ".var"));
    zos.write(("@DARwin 5.0 - ALLELIC - " + ploidy + LINE_SEPARATOR + individualExportFiles.size() + "\t"
            + markerCount * ploidy + LINE_SEPARATOR + "N").getBytes());

    DBCursor markerCursorCopy = markerCursor.copy(); // dunno how expensive this is, but seems safer than keeping all IDs in memory at any time

    short nProgress = 0, nPreviousProgress = 0;
    int avgObjSize = (Integer) mongoTemplate
            .getCollection(mongoTemplate.getCollectionName(VariantRunData.class)).getStats().get("avgObjSize");
    int nChunkSize = nMaxChunkSizeInMb * 1024 * 1024 / avgObjSize;
    markerCursorCopy.batchSize(nChunkSize);

    int nMarkerIndex = 0;
    while (markerCursorCopy.hasNext()) {
        DBObject exportVariant = markerCursorCopy.next();
        Comparable markerId = (Comparable) exportVariant.get("_id");

        if (markerSynonyms != null) {
            Comparable syn = markerSynonyms.get(markerId);
            if (syn != null)
                markerId = syn;
        }
        for (int j = 0; j < ploidy; j++)
            zos.write(("\t" + markerId).getBytes());
    }

    TreeMap<Integer, Comparable> problematicMarkerIndexToNameMap = new TreeMap<Integer, Comparable>();
    ArrayList<String> distinctAlleles = new ArrayList<String>(); // the index of each allele will be used as its code
    int i = 0;
    for (File f : individualExportFiles) {
        BufferedReader in = new BufferedReader(new FileReader(f));
        try {
            String individualId, line = in.readLine(); // read sample id

            if (line != null)
                individualId = line;
            else
                throw new Exception("Unable to read first line of temp export file " + f.getName());

            donFileContents.append(++count + "\t" + individualId + LINE_SEPARATOR);

            zos.write((LINE_SEPARATOR + count).getBytes());
            nMarkerIndex = 0;

            while ((line = in.readLine()) != null) {
                List<String> genotypes = MgdbDao.split(line, "|");
                HashMap<Object, Integer> genotypeCounts = new HashMap<Object, Integer>(); // will help us to keep track of missing genotypes
                int highestGenotypeCount = 0;
                String mostFrequentGenotype = null;
                for (String genotype : genotypes) {
                    if (genotype.length() == 0)
                        continue; /* skip missing genotypes */

                    int gtCount = 1 + MgdbDao.getCountForKey(genotypeCounts, genotype);
                    if (gtCount > highestGenotypeCount) {
                        highestGenotypeCount = gtCount;
                        mostFrequentGenotype = genotype;
                    }
                    genotypeCounts.put(genotype, gtCount);
                }

                if (genotypeCounts.size() > 1) {
                    warningFileWriter.write("- Dissimilar genotypes found for variant __" + nMarkerIndex
                            + "__, individual " + individualId + ". Exporting most frequent: "
                            + mostFrequentGenotype + "\n");
                    problematicMarkerIndexToNameMap.put(nMarkerIndex, "");
                }

                String codedGenotype = "";
                if (mostFrequentGenotype != null)
                    for (String allele : mostFrequentGenotype.split(" ")) {
                        if (!distinctAlleles.contains(allele))
                            distinctAlleles.add(allele);
                        codedGenotype += "\t" + distinctAlleles.indexOf(allele);
                    }
                else
                    codedGenotype = missingGenotype.replaceAll("N", "-1"); // missing data is coded as -1
                zos.write(codedGenotype.getBytes());

                nMarkerIndex++;
            }
        } catch (Exception e) {
            LOG.error("Error exporting data", e);
            progress.setError("Error exporting data: " + e.getClass().getSimpleName()
                    + (e.getMessage() != null ? " - " + e.getMessage() : ""));
            return;
        } finally {
            in.close();
        }

        if (progress.hasAborted())
            return;

        nProgress = (short) (++i * 100 / individualExportFiles.size());
        if (nProgress > nPreviousProgress) {
            //            LOG.debug("============= doDARwinExport (" + i + "): " + nProgress + "% =============");
            progress.setCurrentStepProgress(nProgress);
            nPreviousProgress = nProgress;
        }

        if (!f.delete()) {
            f.deleteOnExit();
            LOG.info("Unable to delete tmp export file " + f.getAbsolutePath());
        }
    }

    zos.putNextEntry(new ZipEntry(exportName + ".don"));
    zos.write(donFileContents.toString().getBytes());

    // now read variant names for those that induced warnings
    nMarkerIndex = 0;
    markerCursor.batchSize(nChunkSize);
    while (markerCursor.hasNext()) {
        DBObject exportVariant = markerCursor.next();
        if (problematicMarkerIndexToNameMap.containsKey(nMarkerIndex)) {
            Comparable markerId = (Comparable) exportVariant.get("_id");

            if (markerSynonyms != null) {
                Comparable syn = markerSynonyms.get(markerId);
                if (syn != null)
                    markerId = syn;
            }
            for (int j = 0; j < ploidy; j++)
                zos.write(("\t" + markerId).getBytes());

            problematicMarkerIndexToNameMap.put(nMarkerIndex, markerId);
        }
    }

    warningFileWriter.close();
    if (warningFile.length() > 0) {
        zos.putNextEntry(new ZipEntry(exportName + "-REMARKS.txt"));
        int nWarningCount = 0;
        BufferedReader in = new BufferedReader(new FileReader(warningFile));
        String sLine;
        while ((sLine = in.readLine()) != null) {
            for (Integer aMarkerIndex : problematicMarkerIndexToNameMap.keySet())
                sLine = sLine.replaceAll("__" + aMarkerIndex + "__",
                        problematicMarkerIndexToNameMap.get(aMarkerIndex).toString());
            zos.write((sLine + "\n").getBytes());
            in.readLine();
            nWarningCount++;
        }
        LOG.info("Number of Warnings for export (" + exportName + "): " + nWarningCount);
        in.close();
    }
    warningFile.delete();

    zos.close();
    progress.setCurrentStepProgress((short) 100);
}

From source file:fr.cirad.mgdb.exporting.individualoriented.PLinkExportHandler.java

@Override
public void exportData(OutputStream outputStream, String sModule, Collection<File> individualExportFiles,
        boolean fDeleteSampleExportFilesOnExit, ProgressIndicator progress, DBCursor markerCursor,
        Map<Comparable, Comparable> markerSynonyms, Map<String, InputStream> readyToExportFiles)
        throws Exception {
    File warningFile = File.createTempFile("export_warnings_", "");
    FileWriter warningFileWriter = new FileWriter(warningFile);

    ZipOutputStream zos = new ZipOutputStream(outputStream);

    if (readyToExportFiles != null)
        for (String readyToExportFile : readyToExportFiles.keySet()) {
            zos.putNextEntry(new ZipEntry(readyToExportFile));
            InputStream inputStream = readyToExportFiles.get(readyToExportFile);
            byte[] dataBlock = new byte[1024];
            int count = inputStream.read(dataBlock, 0, 1024);
            while (count != -1) {
                zos.write(dataBlock, 0, count);
                count = inputStream.read(dataBlock, 0, 1024);
            }//from  w  ww  . j av  a  2 s.  c  o m
        }

    MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
    int markerCount = markerCursor.count();

    String exportName = sModule + "_" + markerCount + "variants_" + individualExportFiles.size()
            + "individuals";
    zos.putNextEntry(new ZipEntry(exportName + ".ped"));

    TreeMap<Integer, Comparable> problematicMarkerIndexToNameMap = new TreeMap<Integer, Comparable>();
    short nProgress = 0, nPreviousProgress = 0;
    int i = 0;
    for (File f : individualExportFiles) {
        BufferedReader in = new BufferedReader(new FileReader(f));
        try {
            String individualId, line = in.readLine(); // read sample id
            if (line != null) {
                individualId = line;
                String population = getIndividualPopulation(sModule, line);
                String individualInfo = (population == null ? "." : population) + " " + individualId;
                zos.write((individualInfo + " 0 0 0 " + getIndividualGenderCode(sModule, individualId))
                        .getBytes());
            } else
                throw new Exception("Unable to read first line of temp export file " + f.getName());

            int nMarkerIndex = 0;
            while ((line = in.readLine()) != null) {
                List<String> genotypes = MgdbDao.split(line, "|");
                HashMap<Object, Integer> genotypeCounts = new HashMap<Object, Integer>(); // will help us to keep track of missing genotypes
                int highestGenotypeCount = 0;
                String mostFrequentGenotype = null;
                for (String genotype : genotypes) {
                    if (genotype.length() == 0)
                        continue; /* skip missing genotypes */

                    int gtCount = 1 + MgdbDao.getCountForKey(genotypeCounts, genotype);
                    if (gtCount > highestGenotypeCount) {
                        highestGenotypeCount = gtCount;
                        mostFrequentGenotype = genotype;
                    }
                    genotypeCounts.put(genotype, gtCount);
                }

                if (genotypeCounts.size() > 1) {
                    warningFileWriter.write("- Dissimilar genotypes found for variant " + nMarkerIndex
                            + ", individual " + individualId + ". Exporting most frequent: "
                            + mostFrequentGenotype + "\n");
                    problematicMarkerIndexToNameMap.put(nMarkerIndex, "");
                }

                String[] alleles = mostFrequentGenotype == null ? new String[0]
                        : mostFrequentGenotype.split(" ");
                if (alleles.length > 2) {
                    warningFileWriter.write("- More than 2 alleles found for variant " + nMarkerIndex
                            + ", individual " + individualId + ". Exporting only the first 2 alleles.\n");
                    problematicMarkerIndexToNameMap.put(nMarkerIndex, "");
                }

                String all1 = alleles.length == 0 ? "0" : alleles[0];
                String all2 = alleles.length == 0 ? "0" : alleles[alleles.length == 1 ? 0 : 1];
                if (all1.length() != 1 || all2.length() != 1) {
                    warningFileWriter
                            .write("- SNP expected, but alleles are not coded on a single char for variant "
                                    + nMarkerIndex + ", individual " + individualId
                                    + ". Ignoring this genotype.\n");
                    problematicMarkerIndexToNameMap.put(nMarkerIndex, "");
                } else
                    zos.write((" " + all1 + " " + all2).getBytes());

                nMarkerIndex++;
            }
        } catch (Exception e) {
            LOG.error("Error exporting data", e);
            progress.setError("Error exporting data: " + e.getClass().getSimpleName()
                    + (e.getMessage() != null ? " - " + e.getMessage() : ""));
            return;
        } finally {
            in.close();
        }

        if (progress.hasAborted())
            return;

        nProgress = (short) (++i * 100 / individualExportFiles.size());
        if (nProgress > nPreviousProgress) {
            progress.setCurrentStepProgress(nProgress);
            nPreviousProgress = nProgress;
        }
        zos.write('\n');

        if (!f.delete()) {
            f.deleteOnExit();
            LOG.info("Unable to delete tmp export file " + f.getAbsolutePath());
        }
    }
    warningFileWriter.close();

    zos.putNextEntry(new ZipEntry(exportName + ".map"));

    int avgObjSize = (Integer) mongoTemplate
            .getCollection(mongoTemplate.getCollectionName(VariantRunData.class)).getStats().get("avgObjSize");
    int nChunkSize = nMaxChunkSizeInMb * 1024 * 1024 / avgObjSize;

    markerCursor.batchSize(nChunkSize);
    int nMarkerIndex = 0;
    while (markerCursor.hasNext()) {
        DBObject exportVariant = markerCursor.next();
        DBObject refPos = (DBObject) exportVariant.get(VariantData.FIELDNAME_REFERENCE_POSITION);
        Comparable markerId = (Comparable) exportVariant.get("_id");
        String chrom = (String) refPos.get(ReferencePosition.FIELDNAME_SEQUENCE);
        Long pos = ((Number) refPos.get(ReferencePosition.FIELDNAME_START_SITE)).longValue();

        if (chrom == null)
            LOG.warn("Chromosomal position not found for marker " + markerId);
        Comparable exportedId = markerSynonyms == null ? markerId : markerSynonyms.get(markerId);
        zos.write(((chrom == null ? "0" : chrom) + " " + exportedId + " " + 0 + " " + (pos == null ? 0 : pos)
                + LINE_SEPARATOR).getBytes());

        if (problematicMarkerIndexToNameMap.containsKey(nMarkerIndex)) { // we are going to need this marker's name for the warning file
            Comparable variantName = markerId;
            if (markerSynonyms != null) {
                Comparable syn = markerSynonyms.get(markerId);
                if (syn != null)
                    variantName = syn;
            }
            problematicMarkerIndexToNameMap.put(nMarkerIndex, variantName);
        }
        nMarkerIndex++;
    }

    if (warningFile.length() > 0) {
        zos.putNextEntry(new ZipEntry(exportName + "-REMARKS.txt"));
        int nWarningCount = 0;
        BufferedReader in = new BufferedReader(new FileReader(warningFile));
        String sLine;
        while ((sLine = in.readLine()) != null) {
            for (Integer aMarkerIndex : problematicMarkerIndexToNameMap.keySet())
                sLine = sLine.replaceAll("__" + aMarkerIndex + "__",
                        problematicMarkerIndexToNameMap.get(aMarkerIndex).toString());
            zos.write((sLine + "\n").getBytes());
            in.readLine();
            nWarningCount++;
        }
        LOG.info("Number of Warnings for export (" + exportName + "): " + nWarningCount);
        in.close();
    }
    warningFile.delete();

    zos.close();
    progress.setCurrentStepProgress((short) 100);
}

From source file:org.opentestsystem.delivery.testadmin.scheduling.DbDumpImporter.java

public static void loadDefaultBsonDumps(final MongoTemplate mongoTemplate) {

    InputStream is = null;/*from   www  . jav  a 2  s  .  com*/
    byte[] buf = new byte[32768];
    byte[] lengthBuf = new byte[4];
    int len;
    int readLen = 0;

    for (String collection : DB_DUMP_COLLECTIONS) {

        if (is != null) {
            try {
                is.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        is = DbDumpImporter.class.getResourceAsStream("/db-dump/" + collection + ".bson");

        List<DBObject> bsonList = new ArrayList<DBObject>();

        try {

            while ((len = is.read(lengthBuf, 0, LENGTH_BYTES)) > 0) {
                ArrayUtils.reverse(lengthBuf);

                readLen = new BigInteger(lengthBuf).intValue();

                ArrayUtils.reverse(lengthBuf);

                System.arraycopy(lengthBuf, 0, buf, 0, 4);

                len = is.read(buf, 4, readLen - LENGTH_BYTES);

                if (len > 0) {
                    bsonList.add(new BasicDBObject(BSON.decode(buf).toMap()));
                }

                Arrays.fill(buf, (byte) 0);
            }

        } catch (Exception e) {
            e.printStackTrace();
        }

        DBCollection dbCollection = mongoTemplate.getCollection(collection);
        dbCollection.insert(bsonList);

    }

    if (is != null) {
        try {
            is.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}