Example usage for com.google.common.base Stopwatch start

List of usage examples for com.google.common.base Stopwatch start

Introduction

In this page you can find the example usage for com.google.common.base Stopwatch start.

Prototype

public Stopwatch start() 

Source Link

Document

Starts the stopwatch.

Usage

From source file:org.opencb.cellbase.app.transform.VariationParser.java

@Override
public void parse() throws IOException, InterruptedException, SQLException, ClassNotFoundException {

    if (!Files.exists(variationDirectoryPath) || !Files.isDirectory(variationDirectoryPath)
            || !Files.isReadable(variationDirectoryPath)) {
        throw new IOException(
                "Variation directory whether does not exist, is not a directory or cannot be read");
    }//w w  w .  ja  v  a 2s  .  c o  m
    if (!existsZippedOrUnzippedFile(VARIATION_FILENAME)
            || isEmpty(variationDirectoryPath.resolve(VARIATION_FILENAME).toString())) {
        throw new IOException("variation.txt.gz whether does not exist, is not a directory or cannot be read");
    }

    Variation variation;

    // To speed up calculation a SQLite database is created with the IDs and file offsets,
    // file must be uncompressed for doing this.
    gunzipVariationInputFiles();

    // add idVariation to transcript_variation file
    preprocessInputFiles();

    // Open variation file, this file never gets uncompressed. It's read from gzip file
    BufferedReader bufferedReaderVariation = getBufferedReader(PREPROCESSED_VARIATION_FILENAME);

    // create buffered readers for all other input files
    createVariationFilesBufferedReaders();

    Map<String, String> seqRegionMap = VariationUtils.parseSeqRegionToMap(variationDirectoryPath);
    Map<String, String> sourceMap = VariationUtils.parseSourceToMap(variationDirectoryPath);

    initializeVariationRelatedArrays();
    Stopwatch globalStartwatch = Stopwatch.createStarted();
    Stopwatch batchWatch = Stopwatch.createStarted();
    logger.info("Parsing variation file " + variationDirectoryPath.resolve(PREPROCESSED_VARIATION_FILENAME)
            + " ...");
    long countprocess = 0;
    String line;
    while ((line = bufferedReaderVariation.readLine()) != null) {
        String[] variationFields = line.split("\t");

        int variationId = Integer.parseInt(variationFields[0]);

        List<String[]> resultVariationFeature = getVariationRelatedFields(VARIATION_FEATURE_FILE_ID,
                variationId);
        if (resultVariationFeature != null && resultVariationFeature.size() > 0) {
            String[] variationFeatureFields = resultVariationFeature.get(0);

            List<TranscriptVariation> transcriptVariation = getTranscriptVariations(variationId,
                    variationFeatureFields[0]);
            List<Xref> xrefs = getXrefs(sourceMap, variationId);

            try {
                // Preparing the variation alleles
                String[] allelesArray = getAllelesArray(variationFeatureFields);

                // For code sanity save chromosome, start, end and id
                String chromosome = seqRegionMap.get(variationFeatureFields[1]);

                if (!chromosome.contains("PATCH") && !chromosome.contains("HSCHR")
                        && !chromosome.contains("contig")) {
                    int start = (variationFeatureFields != null) ? Integer.valueOf(variationFeatureFields[2])
                            : 0;
                    int end = (variationFeatureFields != null) ? Integer.valueOf(variationFeatureFields[3]) : 0;
                    String id = (variationFields[2] != null && !variationFields[2].equals("\\N"))
                            ? variationFields[2]
                            : "";
                    String reference = (allelesArray[0] != null && !allelesArray[0].equals("\\N"))
                            ? allelesArray[0]
                            : "";
                    String alternate = (allelesArray[1] != null && !allelesArray[1].equals("\\N"))
                            ? allelesArray[1]
                            : "";

                    // Preparing frequencies
                    //List<PopulationFrequency> populationFrequencies = getPopulationFrequencies(variationId, allelesArray);
                    List<PopulationFrequency> populationFrequencies = getPopulationFrequencies(chromosome,
                            start, end, id, reference, alternate);

                    // TODO: check that variationFeatureFields is always different to null and intergenic-variant is never used
                    //List<String> consequenceTypes = (variationFeatureFields != null) ? Arrays.asList(variationFeatureFields[12].split(",")) : Arrays.asList("intergenic_variant");
                    List<String> consequenceTypes = Arrays.asList(variationFeatureFields[12].split(","));
                    String displayConsequenceType = getDisplayConsequenceType(consequenceTypes);

                    // we have all the necessary to construct the 'variation' object
                    variation = buildVariation(variationFields, variationFeatureFields, chromosome, start, end,
                            id, reference, alternate, transcriptVariation, xrefs, populationFrequencies,
                            allelesArray, consequenceTypes, displayConsequenceType);
                    fileSerializer.serialize(variation, getOutputFileName(chromosome));
                }

                if (++countprocess % 100000 == 0 && countprocess != 0) {
                    logger.info("Processed variations: " + countprocess);
                    logger.debug("Elapsed time processing batch: " + batchWatch);
                    batchWatch.reset();
                    batchWatch.start();
                }

            } catch (Exception e) {
                e.printStackTrace();
                logger.error("Error parsing variation: " + e.getMessage());
                logger.error("Last line processed: " + line);
                break;
            }
        }
        // TODO: just for testing, remove
        //if (countprocess % 100000 == 0) {
        //    break;
        //}
    }

    logger.info("Variation parsing finished");
    logger.info("Variants processed: " + countprocess);
    logger.debug("Elapsed time parsing: " + globalStartwatch);

    gzipVariationFiles(variationDirectoryPath);

    try {
        bufferedReaderVariation.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask7.java

@Override
public void run() throws IOException {

    GoogleCloudService cloud = (GoogleCloudService) this.getCloudService();

    Stopwatch stopwatch1 = Stopwatch.createUnstarted();
    Stopwatch stopwatch2 = Stopwatch.createUnstarted();
    Set<String> termsSet;

    if (terms == null) {
        // too large, probably saved as a file

        log.info("Using json file for terms: " + termsFile);
        Validate.notNull(termsFile);/*from  w w w  .  ja  v a  2  s .c  o m*/

        String localTermsFile = Utils.TEMP_FOLDER + termsFile;
        cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket);

        // convert from JSON
        termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile);

    } else {
        termsSet = ObjectUtils.csvToSet(terms);
    }

    String localSchemaFile = Utils.TEMP_FOLDER + schemaFile;
    // download the file from the cloud storage
    cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket);

    // uncompress if compressed
    if (GzipUtils.isCompressedFilename(schemaFile)) {
        localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile);
    }

    Map<Long, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaETriples(localSchemaFile,
            TermUtils.RDFS_TBOX);

    // get all the triples we care about
    schemaTerms = new HashMap<>();

    for (String termStr : termsSet) {

        Long term = Long.parseLong(termStr);

        if (allSchemaTriples.containsKey(term)) {
            schemaTerms.put(term, allSchemaTriples.get(term));
        }
    }

    String decoratedTable = table;
    int emptyRetries = 0;
    int totalInferredTriples = 0;
    int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6);
    String instanceId = cloud.getInstanceId();

    QueryGenerator<Long> generator = new QueryGenerator<Long>(schemaTerms, null);

    // timestamp loop
    do {

        Set<Long> productiveTerms = new HashSet<>();
        int interimInferredTriples = 0;

        // First of all run all the queries asynchronously and remember the jobId and filename for each term

        List<QueryResult> queryResults = new ArrayList<QueryResult>();
        generator.setDecoratedTable(decoratedTable);

        List<String> queries = generator.getQueries();
        log.debug("Generated Queries: " + queries);
        String queryResultFilePrefix = Utils.TEMP_FOLDER + instanceId + '_' + System.currentTimeMillis()
                + "_QueryResults_";
        int fileCount = 0;
        for (String query : queries) {
            String jobId = cloud.startBigDataQuery(query);
            queryResults
                    .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId));
            fileCount++;
        }

        // invoke all the queries in parallel
        //this.invokeAll(queryTasks);

        long start = System.currentTimeMillis();

        String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF;

        // save all the query results in files in parallel
        //this.invokeAll(saveTasks);

        for (QueryResult queryResult : queryResults) {
            try {
                // block and wait for each job to complete then save results to a file
                QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(),
                        queryResult.getFilename());
                queryResult.setStats(stats);

            } catch (IOException ioe) {
                // transient backend errors
                log.warn("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe);
                //TODO should throw an exception
            }
        }

        try (PrintWriter writer = new PrintWriter(
                new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) {

            // now loop through the queries
            //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) {
            for (QueryResult queryResult : queryResults) {

                //Term term = entry.getKey();
                QueryStats stats = queryResult.getStats();

                BigInteger rows = stats.getTotalRows();//term.getRows();

                this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes();

                // only process if triples are found matching this term
                if (!BigInteger.ZERO.equals(rows)) {

                    stopwatch1.start();

                    int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms,
                            decoratedTable, writer);

                    interimInferredTriples += inferredTriplesCount;

                    this.totalRows = this.totalRows.add(rows);

                    stopwatch1.stop();

                } else {
                    log.info("Skipping query as no data is found");
                }
            }
        }

        totalInferredTriples += interimInferredTriples;

        if (interimInferredTriples > 0) {

            // stream smaller numbers of inferred triples
            // try uploading from cloud storage
            int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold",
                    100000);

            log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for "
                    + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile);

            if (interimInferredTriples <= streamingThreshold) {
                // stream the data

                Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, true);
                log.info("Total triples to stream into Big Data: " + inferredTriples.size());
                cloud.streamObjectsIntoBigData(inferredTriples,
                        TableUtils.getBigQueryEncodedTripleTable(table));

                log.info("All inferred triples are streamed into Big Data table");

            } else {

                // load the data through cloud storage
                // upload the file to cloud storage
                log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile);
                StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket);
                log.info("File " + file + ", uploaded successfully. Now loading it into big data.");

                String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()),
                        TableUtils.getBigQueryEncodedTripleTable(table), false);
                log.info(
                        "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: "
                                + jobId);

            }

            // reset empty retries
            emptyRetries = 0;

            stopwatch2.reset();

        } else {
            log.info("No new inferred triples");
            // increment empty retries
            emptyRetries++;

            if (!stopwatch2.isRunning()) {
                stopwatch2.start();
            }
        }

        log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: "
                + emptyRetries);

        if (emptyRetries < maxRetries) {
            ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20));

            // FIXME move into the particular cloud implementation service
            long elapsed = System.currentTimeMillis() - start;
            decoratedTable = "[" + table + "@-" + elapsed + "-]";

            log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries);
        }

    } while (emptyRetries < maxRetries); // end timestamp loop

    //executor.shutdown();
    log.info("Finished reasoning, total inferred triples = " + totalInferredTriples);
    //log.info("Number of avoided duplicate terms = " + this.duplicates);
    log.info("Total rows retrieved from big data = " + this.totalRows);
    log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB));
    log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1);
    log.info("Total time spent in empty inference cycles = " + stopwatch2);
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask6.java

@Override
public void run() throws IOException {

    GoogleCloudService cloud = (GoogleCloudService) this.getCloudService();

    //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE);
    //Set<String> terms = metadata.getTerms();
    //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA);
    //String bucket = metadata.getBucket();
    Stopwatch stopwatch1 = Stopwatch.createUnstarted();
    Stopwatch stopwatch2 = Stopwatch.createUnstarted();
    Set<String> termsSet;

    if (terms == null) {
        // too large, probably saved as a file
        //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE);
        log.info("Using json file for terms: " + termsFile);
        Validate.notNull(termsFile);/*  w ww .ja  v a  2 s .  com*/

        String localTermsFile = Utils.TEMP_FOLDER + termsFile;
        cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket);

        // convert from JSON
        termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile);

    } else {
        termsSet = ObjectUtils.csvToSet(terms);
    }

    String localSchemaFile = Utils.TEMP_FOLDER + schemaFile;
    // download the file from the cloud storage
    cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket);

    // uncompress if compressed
    if (GzipUtils.isCompressedFilename(schemaFile)) {
        localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile);
    }

    Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile,
            TermUtils.RDFS_TBOX);

    // get all the triples we care about
    schemaTerms = new HashMap<>();

    for (String term : termsSet) {
        if (allSchemaTriples.containsKey(term)) {
            schemaTerms.put(term, allSchemaTriples.get(term));
        }
    }

    String decoratedTable = table;
    int emptyRetries = 0;
    int totalInferredTriples = 0;
    int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6);
    String instanceId = cloud.getInstanceId();

    QueryGenerator<String> generator = new QueryGenerator<String>(schemaTerms, null);

    // timestamp loop
    do {

        Set<String> productiveTerms = new HashSet<>();
        int interimInferredTriples = 0;

        // First of all run all the queries asynchronously and remember the jobId and filename for each term

        List<QueryResult> queryResults = new ArrayList<QueryResult>();
        generator.setDecoratedTable(decoratedTable);

        List<String> queries = generator.getQueries();
        log.debug("Generated Queries: " + queries);
        String queryResultFilePrefix = Utils.TEMP_FOLDER + instanceId + '_' + System.currentTimeMillis()
                + "_QueryResults_";
        int fileCount = 0;
        for (String query : queries) {
            String jobId = cloud.startBigDataQuery(query);
            queryResults
                    .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId));
            fileCount++;
        }

        // invoke all the queries in parallel
        //this.invokeAll(queryTasks);

        long start = System.currentTimeMillis();

        String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF;

        // save all the query results in files in parallel
        //this.invokeAll(saveTasks);

        for (QueryResult queryResult : queryResults) {
            try {
                // block and wait for each job to complete then save results to a file
                QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(),
                        queryResult.getFilename());
                queryResult.setStats(stats);

            } catch (IOException ioe) {
                // transient backend errors
                log.warn("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe);
                //TODO should throw an exception
            }
        }

        try (PrintWriter writer = new PrintWriter(
                new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) {

            // now loop through the queries
            //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) {
            for (QueryResult queryResult : queryResults) {

                //Term term = entry.getKey();
                QueryStats stats = queryResult.getStats();

                BigInteger rows = stats.getTotalRows();//term.getRows();

                this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes();

                // only process if triples are found matching this term
                if (!BigInteger.ZERO.equals(rows)) {

                    stopwatch1.start();

                    int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms,
                            decoratedTable, writer);

                    interimInferredTriples += inferredTriplesCount;

                    this.totalRows = this.totalRows.add(rows);

                    stopwatch1.stop();

                } else {
                    log.info("Skipping query as no data is found");
                }
            }
        }

        totalInferredTriples += interimInferredTriples;

        if (interimInferredTriples > 0) {

            // stream smaller numbers of inferred triples
            // try uploading from cloud storage
            int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold",
                    100000);

            log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for "
                    + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile);

            if (interimInferredTriples <= streamingThreshold) {
                // stream the data

                Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false);
                log.info("Total triples to stream into Big Data: " + inferredTriples.size());
                cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table));

                log.info("All inferred triples are streamed into Big Data table");

            } else {

                // load the data through cloud storage
                // upload the file to cloud storage
                log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile);
                StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket);
                log.info("File " + file + ", uploaded successfully. Now loading it into big data.");

                String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()),
                        TableUtils.getBigQueryTripleTable(table), false);
                log.info(
                        "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: "
                                + jobId);

            }

            // reset empty retries
            emptyRetries = 0;

            stopwatch2.reset();

        } else {
            log.info("No new inferred triples");
            // increment empty retries
            emptyRetries++;

            if (!stopwatch2.isRunning()) {
                stopwatch2.start();
            }
        }

        log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: "
                + emptyRetries);

        if (emptyRetries < maxRetries) {
            ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20));

            // FIXME move into the particular cloud implementation service
            long elapsed = System.currentTimeMillis() - start;
            decoratedTable = "[" + table + "@-" + elapsed + "-]";

            log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries);
        }

    } while (emptyRetries < maxRetries); // end timestamp loop

    //executor.shutdown();
    log.info("Finished reasoning, total inferred triples = " + totalInferredTriples);
    log.info("Number of avoided duplicate terms = " + this.duplicates);
    log.info("Total rows retrieved from big data = " + this.totalRows);
    log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB));
    log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1);
    log.info("Total time spent in empty inference cycles = " + stopwatch2);
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask8.java

@Override
public void run() throws IOException {

    GoogleCloudService cloud = (GoogleCloudService) this.getCloudService();

    Stopwatch stopwatch1 = Stopwatch.createUnstarted();
    Stopwatch stopwatch2 = Stopwatch.createUnstarted();
    Set<String> termsSet;

    if (terms == null) {
        // too large, probably saved as a file

        log.info("Using json file for terms: " + termsFile);
        Validate.notNull(termsFile);/*from www .  j a v a2 s . c  o m*/

        String localTermsFile = Utils.TEMP_FOLDER + termsFile;
        cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket);

        // convert from JSON
        termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile);

    } else {
        termsSet = ObjectUtils.csvToSet(terms);
    }

    String localSchemaFile = Utils.TEMP_FOLDER + schemaFile;
    // download the file from the cloud storage
    cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket);

    // uncompress if compressed
    if (GzipUtils.isCompressedFilename(schemaFile)) {
        localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile);
    }

    Map<Long, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaETriples(localSchemaFile,
            TermUtils.RDFS_TBOX);

    // get all the triples we care about
    schemaTerms = new HashMap<>();

    for (String termStr : termsSet) {

        Long term = Long.parseLong(termStr);

        if (allSchemaTriples.containsKey(term)) {
            schemaTerms.put(term, allSchemaTriples.get(term));
        }
    }

    String decoratedTable = table;
    int emptyRetries = 0;
    int totalInferredTriples = 0;
    int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6);
    this.ddLimit = Config.getIntegerProperty(Constants.REASON_DATA_DIRECT_DOWNLOAD_LIMIT, 1_200_000);
    String instanceId = cloud.getInstanceId();

    QueryGenerator<Long> generator = new QueryGenerator<Long>(schemaTerms, null);

    // timestamp loop
    do {

        Set<Long> productiveTerms = new HashSet<>();
        int interimInferredTriples = 0;

        // First of all run all the queries asynchronously and remember the jobId and filename for each term

        List<QueryResult> queryResults = new ArrayList<QueryResult>();
        generator.setDecoratedTable(decoratedTable);

        List<String> queries = generator.getQueries();
        log.debug("Generated Queries: " + queries);
        String queryResultFilePrefix = instanceId + '_' + System.currentTimeMillis() + "_QueryResults_";
        int fileCount = 0;
        for (String query : queries) {
            String jobId = cloud.startBigDataQuery(query, new BigDataTable(this.table));
            queryResults
                    .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId));
            fileCount++;
        }

        // invoke all the queries in parallel
        //this.invokeAll(queryTasks);

        long start = System.currentTimeMillis();

        String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF;

        // save all the query results in files in parallel
        //this.invokeAll(saveTasks);

        for (QueryResult queryResult : queryResults) {
            try {
                // block and wait for each job to complete then save results to a file
                QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(),
                        queryResult.getFilename(), this.bucket, null, this.ddLimit);
                queryResult.setStats(stats);

            } catch (IOException ioe) {

                log.error("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe);
                throw ioe;
            }
        }

        try (PrintWriter writer = new PrintWriter(
                new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) {

            // now loop through the queries
            //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) {
            for (QueryResult queryResult : queryResults) {

                //Term term = entry.getKey();
                QueryStats stats = queryResult.getStats();

                BigInteger rows = stats.getTotalRows();//term.getRows();

                this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes();

                // only process if triples are found matching this term
                if (!BigInteger.ZERO.equals(rows)) {

                    stopwatch1.start();

                    int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms,
                            writer);

                    interimInferredTriples += inferredTriplesCount;

                    this.totalRows = this.totalRows.add(rows);

                    stopwatch1.stop();

                } else {
                    log.info("Skipping query as no data is found");
                }
            }
        }

        totalInferredTriples += interimInferredTriples;

        if (interimInferredTriples > 0) {

            // stream smaller numbers of inferred triples
            // try uploading from cloud storage
            int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold",
                    100000);

            log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for "
                    + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile);

            if (interimInferredTriples <= streamingThreshold) {
                // stream the data

                Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, true);
                log.info("Total triples to stream into Big Data: " + inferredTriples.size());
                cloud.streamObjectsIntoBigData(inferredTriples,
                        TableUtils.getBigQueryEncodedTripleTable(table));

                log.info("All inferred triples are streamed into Big Data table");

            } else {

                // load the data through cloud storage
                // upload the file to cloud storage
                log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile);
                StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket);
                log.info("File " + file + ", uploaded successfully. Now loading it into big data.");

                String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()),
                        TableUtils.getBigQueryEncodedTripleTable(table), false);
                log.info(
                        "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: "
                                + jobId);

            }

            // reset empty retries
            emptyRetries = 0;

            stopwatch2.reset();

        } else {
            log.info("No new inferred triples");
            // increment empty retries
            emptyRetries++;

            if (!stopwatch2.isRunning()) {
                stopwatch2.start();
            }
        }

        log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: "
                + emptyRetries);

        if (emptyRetries < maxRetries) {
            ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20));

            // FIXME move into the particular cloud implementation service
            long elapsed = System.currentTimeMillis() - start;
            decoratedTable = "[" + table + "@-" + elapsed + "-]";

            log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries);
        }

    } while (emptyRetries < maxRetries); // end timestamp loop

    //executor.shutdown();
    log.info("Finished reasoning, total inferred triples = " + totalInferredTriples);
    //log.info("Number of avoided duplicate terms = " + this.duplicates);
    log.info("Total rows retrieved from big data = " + this.totalRows);
    log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB));
    log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1);
    log.info("Total time spent in empty inference cycles = " + stopwatch2);
}

From source file:org.opencb.cellbase.app.transform.variation.VariationParser.java

@Override
public void parse() throws IOException, InterruptedException, SQLException, ClassNotFoundException {

    if (!Files.exists(variationDirectoryPath) || !Files.isDirectory(variationDirectoryPath)
            || !Files.isReadable(variationDirectoryPath)) {
        throw new IOException(
                "Variation directory whether does not exist, is not a directory or cannot be read");
    }/* ww w .  jav a 2 s  .com*/
    if (!variationFile.existsZippedOrUnzippedFile() || variationFile.isEmpty()) {
        throw new IOException("variation.txt.gz whether does not exist, is not a directory or cannot be read");
    }

    // add idVariation to transcript_variation file
    preprocessInputFiles();

    // Open variation file, this file never gets uncompressed. It's read from gzip file
    BufferedReader bufferedReaderVariation = variationFile.getBufferedReader();

    // create buffered readers for all other input files
    createVariationFilesReaders();

    Map<String, String> seqRegionMap = VariationUtils.parseSeqRegionToMap(variationDirectoryPath);
    Map<String, String> sourceMap = VariationUtils.parseSourceToMap(variationDirectoryPath);

    Stopwatch globalStartwatch = Stopwatch.createStarted();
    Stopwatch batchWatch = Stopwatch.createStarted();
    logger.info("Parsing variation file {} ...",
            variationDirectoryPath.resolve(PREPROCESSED_VARIATION_FILENAME));
    long countprocess = 0, incorrectEndVariants = 0, incorrectAllelesVariants = 0;

    String line;
    while ((line = bufferedReaderVariation.readLine()) != null) {
        String[] variationFields = line.split("\t");

        int variationId = Integer.parseInt(variationFields[0]);

        List<String[]> resultVariationFeature = variationFeatureFile.getVariationRelatedLines(variationId);
        if (resultVariationFeature != null && resultVariationFeature.size() > 0) {
            String[] variationFeatureFields = resultVariationFeature.get(0);

            List<TranscriptVariation> transcriptVariation = getTranscriptVariations(variationId,
                    variationFeatureFields[0]);
            List<Xref> xrefs = getXrefs(sourceMap, variationId);

            try {
                // Preparing the variation alleles
                String[] allelesArray = getAllelesArray(variationFeatureFields);
                if (allelesArray == null) {
                    logger.debug("Incorrect allele string: {}", variationFeatureFields[6]);
                    incorrectAllelesVariants++;
                } else {
                    String chromosome = seqRegionMap.get(variationFeatureFields[1]);

                    if (!chromosome.contains("PATCH") && !chromosome.contains("HSCHR")
                            && !chromosome.contains("contig")) {
                        int start = Integer.valueOf(variationFeatureFields[2]);
                        int end = Integer.valueOf(variationFeatureFields[3]);
                        String id = (variationFields[2] != null && !variationFields[2].equals("\\N"))
                                ? variationFields[2]
                                : "";
                        String reference = (allelesArray[0] != null && !allelesArray[0].equals("\\N"))
                                ? allelesArray[0]
                                : "";
                        List<String> alternates = getAlternates(allelesArray);

                        List<String> ids = new LinkedList<>();
                        ids.add(id);

                        List<String> hgvs = getHgvs(transcriptVariation);
                        Map<String, AdditionalAttribute> additionalAttributes = getAdditionalAttributes(
                                variationFields, variationFeatureFields);

                        List<ConsequenceType> conseqTypes = getConsequenceTypes(transcriptVariation);
                        String displayConsequenceTypes = getDisplayConsequenceType(variationFeatureFields);
                        String strand = variationFeatureFields[4];
                        String ancestralAllele = (variationFields[4] != null
                                && !variationFields[4].equals("\\N")) ? variationFields[4] : "";
                        String minorAllele = (variationFeatureFields[16] != null
                                && !variationFeatureFields[16].equals("\\N")) ? variationFeatureFields[16] : "";
                        Float minorAlleleFreq = (variationFeatureFields[17] != null
                                && !variationFeatureFields[17].equals("\\N"))
                                        ? Float.parseFloat(variationFeatureFields[17])
                                        : null;

                        // create a variation object for each alternative
                        for (String alternate : alternates) {
                            VariantType type = getVariantType(reference, alternate);
                            if (type == null) {
                                logger.warn("Unrecognized variant type (won't be parsed): {}:{}-{} {}/{}",
                                        chromosome, start, end, reference, alternate);
                            } else if (incorrectStartAndEnd(start, end, reference)) {
                                logger.debug("Incorrect variant start-end pair:  {}:{}-{} {}/{}", chromosome,
                                        start, end, reference, alternate);
                                incorrectEndVariants++;
                            } else {
                                // build and serialize variant
                                Variant variation = buildVariant(chromosome, start, end, reference, alternate,
                                        type, ids, hgvs, additionalAttributes, displayConsequenceTypes,
                                        conseqTypes, id, xrefs, strand, ancestralAllele, minorAllele,
                                        minorAlleleFreq);
                                fileSerializer.serialize(variation, getOutputFileName(chromosome));
                            }
                            countprocess++;
                        }
                    }
                }

                if (countprocess % 100000 == 0 && countprocess != 0) {
                    logger.info("Processed variations: {}", countprocess);
                    logger.debug("Elapsed time processing batch: {}", batchWatch);
                    batchWatch.reset();
                    batchWatch.start();
                }

            } catch (Exception e) {
                e.printStackTrace();
                logger.error("Error parsing variation: {}", e.getMessage());
                logger.error("Last line processed: {}", line);
                break;
            }
        }
        //            // TODO: just for testing, remove
        //            if (countprocess % 1000000 == 0) {
        //                break;
        //            }
    }

    serializer.close();
    logger.info("Variation parsing finished");
    logger.info("Variants processed: {}", countprocess);
    logger.info("Variants not parsed due to incorrect start-end: {}", incorrectEndVariants);
    logger.info("Variants not parsed due to incorrect alleles: {}", incorrectAllelesVariants);
    logger.debug("Elapsed time parsing: {}", globalStartwatch);

    gzipVariationFiles(variationDirectoryPath);

    try {
        bufferedReaderVariation.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.thinkbiganalytics.feedmgr.service.feed.DefaultFeedManagerFeedService.java

/**
 * Create/Update a Feed in NiFi. Save the metadata to Kylo meta store.
 *
 * @param feedMetadata the feed metadata
 * @return an object indicating if the feed creation was successful or not
 *///from   w  w w. j av  a2s  . c om
private NifiFeed createAndSaveFeed(FeedMetadata feedMetadata) {
    Stopwatch stopwatch = Stopwatch.createStarted();
    NifiFeed feed = null;
    if (StringUtils.isBlank(feedMetadata.getId())) {
        feedMetadata.setIsNew(true);

        //If the feed is New we need to ensure the user has CREATE_FEED entity permission
        if (accessController.isEntityAccessControlled()) {
            metadataAccess.read(() -> {
                //ensure the user has rights to create feeds under the category
                Category domainCategory = categoryProvider
                        .findById(categoryProvider.resolveId(feedMetadata.getCategory().getId()));
                if (domainCategory == null) {
                    //throw exception
                    throw new MetadataRepositoryException(
                            "Unable to find the category " + feedMetadata.getCategory().getSystemName());
                }
                domainCategory.getAllowedActions().checkPermission(CategoryAccessControl.CREATE_FEED);

                //ensure the user has rights to create feeds using the template
                FeedManagerTemplate domainTemplate = templateProvider
                        .findById(templateProvider.resolveId(feedMetadata.getTemplateId()));
                if (domainTemplate == null) {
                    throw new MetadataRepositoryException(
                            "Unable to find the template " + feedMetadata.getTemplateId());
                }
                //  domainTemplate.getAllowedActions().checkPermission(TemplateAccessControl.CREATE_FEED);
            });
        }

    } else if (accessController.isEntityAccessControlled()) {
        metadataAccess.read(() -> {
            //perform explict entity access check here as we dont want to modify the NiFi flow unless user has access to edit the feed
            Feed.ID domainId = feedProvider.resolveId(feedMetadata.getId());
            Feed domainFeed = feedProvider.findById(domainId);
            if (domainFeed != null) {
                domainFeed.getAllowedActions().checkPermission(FeedAccessControl.EDIT_DETAILS);
            } else {
                throw new NotFoundException("Feed not found for id " + feedMetadata.getId());
            }
        });
    }

    //replace expressions with values
    if (feedMetadata.getTable() != null) {
        feedMetadata.getTable().updateMetadataFieldValues();
    }

    if (feedMetadata.getProperties() == null) {
        feedMetadata.setProperties(new ArrayList<NifiProperty>());
    }

    //store ref to the originalFeedProperties before resolving and merging with the template
    List<NifiProperty> orignialFeedProperties = feedMetadata.getProperties();

    //get all the properties for the metadata
    RegisteredTemplate registeredTemplate = registeredTemplateService
            .findRegisteredTemplate(new RegisteredTemplateRequest.Builder()
                    .templateId(feedMetadata.getTemplateId()).templateName(feedMetadata.getTemplateName())
                    .isFeedEdit(true).includeSensitiveProperties(true).build());

    //update the template properties with the feedMetadata properties
    List<NifiProperty> matchedProperties = NifiPropertyUtil.matchAndSetPropertyByProcessorName(
            registeredTemplate.getProperties(), feedMetadata.getProperties(),
            NifiPropertyUtil.PROPERTY_MATCH_AND_UPDATE_MODE.UPDATE_ALL_PROPERTIES);

    feedMetadata.setProperties(registeredTemplate.getProperties());
    feedMetadata.setRegisteredTemplate(registeredTemplate);

    //resolve any ${metadata.} properties
    List<NifiProperty> resolvedProperties = propertyExpressionResolver.resolvePropertyExpressions(feedMetadata);

    /*
    //store all input related properties as well
    List<NifiProperty> inputProperties = NifiPropertyUtil
    .findInputProperties(registeredTemplate.getProperties());
            
    ///store only those matched and resolved in the final metadata store
    Set<NifiProperty> updatedProperties = new HashSet<>();
    //first get all those selected properties where the value differs from the template value
            
    List<NifiProperty> modifiedProperties = registeredTemplate.findModifiedDefaultProperties();
    if (modifiedProperties != null) {
     propertyExpressionResolver.resolvePropertyExpressions(modifiedProperties,feedMetadata);
    updatedProperties.addAll(modifiedProperties);
    }
    updatedProperties.addAll(matchedProperties);
    updatedProperties.addAll(resolvedProperties);
    updatedProperties.addAll(inputProperties);
    feedMetadata.setProperties(new ArrayList<NifiProperty>(updatedProperties));
            
    */

    //decrypt the metadata
    feedModelTransform.decryptSensitivePropertyValues(feedMetadata);

    FeedMetadata.STATE state = FeedMetadata.STATE.NEW;
    try {
        state = FeedMetadata.STATE.valueOf(feedMetadata.getState());
    } catch (Exception e) {
        //if the string isnt valid, disregard as it will end up disabling the feed.
    }

    boolean enabled = (FeedMetadata.STATE.NEW.equals(state) && feedMetadata.isActive())
            || FeedMetadata.STATE.ENABLED.equals(state);

    // flag to indicate to enable the feed later
    //if this is the first time for this feed and it is set to be enabled, mark it to be enabled after we commit to the JCR store
    boolean enableLater = false;
    if (enabled && feedMetadata.isNew()) {
        enableLater = true;
        enabled = false;
        feedMetadata.setState(FeedMetadata.STATE.DISABLED.name());
    }

    CreateFeedBuilder feedBuilder = CreateFeedBuilder
            .newFeed(nifiRestClient, nifiFlowCache, feedMetadata, registeredTemplate.getNifiTemplateId(),
                    propertyExpressionResolver, propertyDescriptorTransform, niFiObjectCache)
            .enabled(enabled).removeInactiveVersionedProcessGroup(removeInactiveNifiVersionedFeedFlows)
            .autoAlign(nifiAutoFeedsAlignAfterSave).withNiFiTemplateCache(niFiTemplateCache);

    if (registeredTemplate.isReusableTemplate()) {
        feedBuilder.setReusableTemplate(true);
        feedMetadata.setIsReusableFeed(true);
    } else {
        feedBuilder.inputProcessorType(feedMetadata.getInputProcessorType())
                .feedSchedule(feedMetadata.getSchedule()).properties(feedMetadata.getProperties());
        if (registeredTemplate.usesReusableTemplate()) {
            for (ReusableTemplateConnectionInfo connection : registeredTemplate
                    .getReusableTemplateConnections()) {
                feedBuilder.addInputOutputPort(new InputOutputPort(
                        connection.getReusableTemplateInputPortName(), connection.getFeedOutputPortName()));
            }
        }
    }
    stopwatch.stop();
    log.debug("Time to prepare data for saving feed in NiFi: {} ms", stopwatch.elapsed(TimeUnit.MILLISECONDS));
    stopwatch.reset();
    stopwatch.start();
    NifiProcessGroup entity = feedBuilder.build();

    stopwatch.stop();
    log.debug("Time to save feed in NiFi: {} ms", stopwatch.elapsed(TimeUnit.MILLISECONDS));
    stopwatch.reset();

    feed = new NifiFeed(feedMetadata, entity);

    //set the original feedProperties back to the feed
    feedMetadata.setProperties(orignialFeedProperties);
    //encrypt the metadata properties
    feedModelTransform.encryptSensitivePropertyValues(feedMetadata);

    if (entity.isSuccess()) {
        feedMetadata.setNifiProcessGroupId(entity.getProcessGroupEntity().getId());

        try {
            stopwatch.start();
            saveFeed(feedMetadata);
            feed.setEnableAfterSave(enableLater);
            feed.setSuccess(true);
            stopwatch.stop();
            log.debug("Time to saveFeed in Kylo: {} ms", stopwatch.elapsed(TimeUnit.MILLISECONDS));
            stopwatch.reset();
            stopwatch.start();
            feedBuilder.checkAndRemoveVersionedProcessGroup();

        } catch (Exception e) {
            feed.setSuccess(false);
            feed.addErrorMessage(e);
        }

    } else {
        feed.setSuccess(false);
    }
    if (!feed.isSuccess()) {
        if (!entity.isRolledBack()) {
            try {
                feedBuilder.rollback();
            } catch (FeedRollbackException rollbackException) {
                log.error("Error rolling back feed {}. {} ", feedMetadata.getCategoryAndFeedName(),
                        rollbackException.getMessage());
                feed.addErrorMessage("Error occurred in rolling back the Feed.");
            }
            entity.setRolledBack(true);
        }
    }
    return feed;
}