Example usage for com.google.common.base Stopwatch createUnstarted

List of usage examples for com.google.common.base Stopwatch createUnstarted

Introduction

In this page you can find the example usage for com.google.common.base Stopwatch createUnstarted.

Prototype

@CheckReturnValue
public static Stopwatch createUnstarted() 

Source Link

Document

Creates (but does not start) a new stopwatch using System#nanoTime as its time source.

Usage

From source file:es.usc.citius.composit.cli.command.CompositionCommand.java

private void benchmark(ComposIT<Concept, Boolean> composit, WSCTest.Dataset dataset, int cycles) {
    // Compute benchmark
    String bestSample = null;/*from   w w w  . j  ava  2 s .c  o  m*/
    Stopwatch watch = Stopwatch.createUnstarted();
    long minMS = Long.MAX_VALUE;
    for (int i = 0; i < cycles; i++) {
        System.out.println("[ComposIT Search] Starting benchmark cycle " + (i + 1));
        watch.start();
        composit.search(dataset.getRequest());
        long ms = watch.stop().elapsed(TimeUnit.MILLISECONDS);
        if (ms < minMS) {
            minMS = ms;
        }
        watch.reset();

        if (cli.isMetrics()) {
            cli.println(" > Metrics: ");
            cli.println(" METRICS NOT IMPLEMENTED");
        }
    }
    System.out.println(
            "[Benchmark Result] " + cycles + "-cycle benchmark completed. Best time: " + minMS + " ms.");
    if (cli.isMetrics() && bestSample != null) {
        cli.println("Best sample: " + bestSample);
    }
}

From source file:org.opendaylight.protocol.bgp.rib.impl.BGPSessionStats.java

public BGPSessionStats(final Open remoteOpen, final int holdTimerValue, final int keepAlive,
        final Channel channel, final Optional<BGPSessionPreferences> localPreferences,
        final Collection<BgpTableType> tableTypes) {
    this.sessionStopwatch = Stopwatch.createUnstarted();
    this.stats = new BgpSessionState();
    this.stats.setHoldtimeCurrent(holdTimerValue);
    this.stats.setKeepaliveCurrent(keepAlive);
    this.stats.setPeerPreferences(setPeerPref(remoteOpen, channel, tableTypes));
    this.stats.setSpeakerPreferences(setSpeakerPref(channel, localPreferences));
    initMsgs();/*w  w  w  . j a v a2  s. com*/
}

From source file:com.diskoverorta.legal.LegalManager.java

public String tagLegalTextAnalyticsComponents(String sDoc, Map<String, String> apiConfig) {
    logger.info("tagging legal text analytics components");
    Stopwatch allTimer = Stopwatch.createUnstarted();
    Stopwatch entitiesTimer = Stopwatch.createUnstarted();
    Stopwatch ontologyTimer = Stopwatch.createUnstarted();
    allTimer.start();//  www  . j  ava2 s .  co m
    Set<String> personEntities = new HashSet<String>();
    Set<String> orgEntities = new HashSet<String>();

    List<String> sentList = m_snlp.splitSentencesINDocument(sDoc);
    String chunkSize = null;
    logger.info("getting chunk size");
    if ((apiConfig != null) && (apiConfig.containsKey("chunksize") == true))
        chunkSize = apiConfig.get("chunksize");
    logger.info("Chunking sentences");
    if (chunkSize != null)
        sentList = chunkSentences(sentList, chunkSize);

    String jsonOutput = "";
    Gson gson = new GsonBuilder().setPrettyPrinting().create();

    Map<String, Set<String>> ontologyTemp = null;

    List<LegalObject> legalcomponents = new ArrayList<LegalObject>();
    for (String temp : sentList) {
        LegalObject legalcomponent = new LegalObject();
        ontologyTimer.start();
        ontologyTemp = m_oManager.getOntologyForSelectedTerms(temp, m_config.ontologyConfig);
        ontologyTimer.stop();
        legalcomponent.sentence = temp;
        entitiesTimer.start();
        legalcomponent.entities = m_eManager.getSelectedEntitiesForSentence(temp, m_config.entityConfig);
        entitiesTimer.stop();
        logger.info("Inserting person entities");
        insertEntity(personEntities, legalcomponent.entities.person);
        logger.info("Inserting OrganiZation entities");
        insertEntity(orgEntities, legalcomponent.entities.organization);
        legalcomponent.events = ontologyTemp.get("Events");
        legalcomponent.topics = ontologyTemp.get("Topics");

        legalcomponents.add(legalcomponent);
    }
    logger.info("getting coref for selected entities and store it in a map");
    Map<String, Map<String, Set<String>>> coref_out = m_coref.getCorefForSelectedEntites(sDoc, personEntities,
            orgEntities, m_config.corefConfig);
    logger.info("getting coref Inverse Map for person entity");

    Map<String, Set<String>> coref_person = Duke.getCoref(personEntities);
    Map<String, Set<String>> coref_org = Duke.getCoref(orgEntities);

    Map<String, String> gpersonCoref = getCorefInvMap(coref_person);
    logger.info("getting coref Inverse Map for Organization entity");
    Map<String, String> gorgCoref = getCorefInvMap(coref_org);

    for (LegalObject temp : legalcomponents) {
        temp.personAlias = getMatchedCoref(gpersonCoref, temp.entities.person);
        temp.orgAlias = getMatchedCoref(gorgCoref, temp.entities.organization);
    }

    jsonOutput = gson.toJson(legalcomponents);
    logger.info("Person Organization took" + entitiesTimer);
    logger.info("Topics and Events took" + ontologyTimer);
    logger.info("Total time taken" + allTimer);
    allTimer.stop();

    return jsonOutput;
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask7.java

@Override
public void run() throws IOException {

    GoogleCloudService cloud = (GoogleCloudService) this.getCloudService();

    Stopwatch stopwatch1 = Stopwatch.createUnstarted();
    Stopwatch stopwatch2 = Stopwatch.createUnstarted();
    Set<String> termsSet;

    if (terms == null) {
        // too large, probably saved as a file

        log.info("Using json file for terms: " + termsFile);
        Validate.notNull(termsFile);/*from   w  w  w . j a va  2s .c om*/

        String localTermsFile = Utils.TEMP_FOLDER + termsFile;
        cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket);

        // convert from JSON
        termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile);

    } else {
        termsSet = ObjectUtils.csvToSet(terms);
    }

    String localSchemaFile = Utils.TEMP_FOLDER + schemaFile;
    // download the file from the cloud storage
    cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket);

    // uncompress if compressed
    if (GzipUtils.isCompressedFilename(schemaFile)) {
        localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile);
    }

    Map<Long, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaETriples(localSchemaFile,
            TermUtils.RDFS_TBOX);

    // get all the triples we care about
    schemaTerms = new HashMap<>();

    for (String termStr : termsSet) {

        Long term = Long.parseLong(termStr);

        if (allSchemaTriples.containsKey(term)) {
            schemaTerms.put(term, allSchemaTriples.get(term));
        }
    }

    String decoratedTable = table;
    int emptyRetries = 0;
    int totalInferredTriples = 0;
    int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6);
    String instanceId = cloud.getInstanceId();

    QueryGenerator<Long> generator = new QueryGenerator<Long>(schemaTerms, null);

    // timestamp loop
    do {

        Set<Long> productiveTerms = new HashSet<>();
        int interimInferredTriples = 0;

        // First of all run all the queries asynchronously and remember the jobId and filename for each term

        List<QueryResult> queryResults = new ArrayList<QueryResult>();
        generator.setDecoratedTable(decoratedTable);

        List<String> queries = generator.getQueries();
        log.debug("Generated Queries: " + queries);
        String queryResultFilePrefix = Utils.TEMP_FOLDER + instanceId + '_' + System.currentTimeMillis()
                + "_QueryResults_";
        int fileCount = 0;
        for (String query : queries) {
            String jobId = cloud.startBigDataQuery(query);
            queryResults
                    .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId));
            fileCount++;
        }

        // invoke all the queries in parallel
        //this.invokeAll(queryTasks);

        long start = System.currentTimeMillis();

        String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF;

        // save all the query results in files in parallel
        //this.invokeAll(saveTasks);

        for (QueryResult queryResult : queryResults) {
            try {
                // block and wait for each job to complete then save results to a file
                QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(),
                        queryResult.getFilename());
                queryResult.setStats(stats);

            } catch (IOException ioe) {
                // transient backend errors
                log.warn("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe);
                //TODO should throw an exception
            }
        }

        try (PrintWriter writer = new PrintWriter(
                new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) {

            // now loop through the queries
            //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) {
            for (QueryResult queryResult : queryResults) {

                //Term term = entry.getKey();
                QueryStats stats = queryResult.getStats();

                BigInteger rows = stats.getTotalRows();//term.getRows();

                this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes();

                // only process if triples are found matching this term
                if (!BigInteger.ZERO.equals(rows)) {

                    stopwatch1.start();

                    int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms,
                            decoratedTable, writer);

                    interimInferredTriples += inferredTriplesCount;

                    this.totalRows = this.totalRows.add(rows);

                    stopwatch1.stop();

                } else {
                    log.info("Skipping query as no data is found");
                }
            }
        }

        totalInferredTriples += interimInferredTriples;

        if (interimInferredTriples > 0) {

            // stream smaller numbers of inferred triples
            // try uploading from cloud storage
            int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold",
                    100000);

            log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for "
                    + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile);

            if (interimInferredTriples <= streamingThreshold) {
                // stream the data

                Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, true);
                log.info("Total triples to stream into Big Data: " + inferredTriples.size());
                cloud.streamObjectsIntoBigData(inferredTriples,
                        TableUtils.getBigQueryEncodedTripleTable(table));

                log.info("All inferred triples are streamed into Big Data table");

            } else {

                // load the data through cloud storage
                // upload the file to cloud storage
                log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile);
                StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket);
                log.info("File " + file + ", uploaded successfully. Now loading it into big data.");

                String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()),
                        TableUtils.getBigQueryEncodedTripleTable(table), false);
                log.info(
                        "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: "
                                + jobId);

            }

            // reset empty retries
            emptyRetries = 0;

            stopwatch2.reset();

        } else {
            log.info("No new inferred triples");
            // increment empty retries
            emptyRetries++;

            if (!stopwatch2.isRunning()) {
                stopwatch2.start();
            }
        }

        log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: "
                + emptyRetries);

        if (emptyRetries < maxRetries) {
            ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20));

            // FIXME move into the particular cloud implementation service
            long elapsed = System.currentTimeMillis() - start;
            decoratedTable = "[" + table + "@-" + elapsed + "-]";

            log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries);
        }

    } while (emptyRetries < maxRetries); // end timestamp loop

    //executor.shutdown();
    log.info("Finished reasoning, total inferred triples = " + totalInferredTriples);
    //log.info("Number of avoided duplicate terms = " + this.duplicates);
    log.info("Total rows retrieved from big data = " + this.totalRows);
    log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB));
    log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1);
    log.info("Total time spent in empty inference cycles = " + stopwatch2);
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask6.java

@Override
public void run() throws IOException {

    GoogleCloudService cloud = (GoogleCloudService) this.getCloudService();

    //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE);
    //Set<String> terms = metadata.getTerms();
    //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA);
    //String bucket = metadata.getBucket();
    Stopwatch stopwatch1 = Stopwatch.createUnstarted();
    Stopwatch stopwatch2 = Stopwatch.createUnstarted();
    Set<String> termsSet;

    if (terms == null) {
        // too large, probably saved as a file
        //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE);
        log.info("Using json file for terms: " + termsFile);
        Validate.notNull(termsFile);/* w ww  . j a  v  a 2 s . c o m*/

        String localTermsFile = Utils.TEMP_FOLDER + termsFile;
        cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket);

        // convert from JSON
        termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile);

    } else {
        termsSet = ObjectUtils.csvToSet(terms);
    }

    String localSchemaFile = Utils.TEMP_FOLDER + schemaFile;
    // download the file from the cloud storage
    cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket);

    // uncompress if compressed
    if (GzipUtils.isCompressedFilename(schemaFile)) {
        localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile);
    }

    Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile,
            TermUtils.RDFS_TBOX);

    // get all the triples we care about
    schemaTerms = new HashMap<>();

    for (String term : termsSet) {
        if (allSchemaTriples.containsKey(term)) {
            schemaTerms.put(term, allSchemaTriples.get(term));
        }
    }

    String decoratedTable = table;
    int emptyRetries = 0;
    int totalInferredTriples = 0;
    int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6);
    String instanceId = cloud.getInstanceId();

    QueryGenerator<String> generator = new QueryGenerator<String>(schemaTerms, null);

    // timestamp loop
    do {

        Set<String> productiveTerms = new HashSet<>();
        int interimInferredTriples = 0;

        // First of all run all the queries asynchronously and remember the jobId and filename for each term

        List<QueryResult> queryResults = new ArrayList<QueryResult>();
        generator.setDecoratedTable(decoratedTable);

        List<String> queries = generator.getQueries();
        log.debug("Generated Queries: " + queries);
        String queryResultFilePrefix = Utils.TEMP_FOLDER + instanceId + '_' + System.currentTimeMillis()
                + "_QueryResults_";
        int fileCount = 0;
        for (String query : queries) {
            String jobId = cloud.startBigDataQuery(query);
            queryResults
                    .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId));
            fileCount++;
        }

        // invoke all the queries in parallel
        //this.invokeAll(queryTasks);

        long start = System.currentTimeMillis();

        String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF;

        // save all the query results in files in parallel
        //this.invokeAll(saveTasks);

        for (QueryResult queryResult : queryResults) {
            try {
                // block and wait for each job to complete then save results to a file
                QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(),
                        queryResult.getFilename());
                queryResult.setStats(stats);

            } catch (IOException ioe) {
                // transient backend errors
                log.warn("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe);
                //TODO should throw an exception
            }
        }

        try (PrintWriter writer = new PrintWriter(
                new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) {

            // now loop through the queries
            //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) {
            for (QueryResult queryResult : queryResults) {

                //Term term = entry.getKey();
                QueryStats stats = queryResult.getStats();

                BigInteger rows = stats.getTotalRows();//term.getRows();

                this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes();

                // only process if triples are found matching this term
                if (!BigInteger.ZERO.equals(rows)) {

                    stopwatch1.start();

                    int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms,
                            decoratedTable, writer);

                    interimInferredTriples += inferredTriplesCount;

                    this.totalRows = this.totalRows.add(rows);

                    stopwatch1.stop();

                } else {
                    log.info("Skipping query as no data is found");
                }
            }
        }

        totalInferredTriples += interimInferredTriples;

        if (interimInferredTriples > 0) {

            // stream smaller numbers of inferred triples
            // try uploading from cloud storage
            int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold",
                    100000);

            log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for "
                    + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile);

            if (interimInferredTriples <= streamingThreshold) {
                // stream the data

                Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false);
                log.info("Total triples to stream into Big Data: " + inferredTriples.size());
                cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table));

                log.info("All inferred triples are streamed into Big Data table");

            } else {

                // load the data through cloud storage
                // upload the file to cloud storage
                log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile);
                StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket);
                log.info("File " + file + ", uploaded successfully. Now loading it into big data.");

                String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()),
                        TableUtils.getBigQueryTripleTable(table), false);
                log.info(
                        "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: "
                                + jobId);

            }

            // reset empty retries
            emptyRetries = 0;

            stopwatch2.reset();

        } else {
            log.info("No new inferred triples");
            // increment empty retries
            emptyRetries++;

            if (!stopwatch2.isRunning()) {
                stopwatch2.start();
            }
        }

        log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: "
                + emptyRetries);

        if (emptyRetries < maxRetries) {
            ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20));

            // FIXME move into the particular cloud implementation service
            long elapsed = System.currentTimeMillis() - start;
            decoratedTable = "[" + table + "@-" + elapsed + "-]";

            log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries);
        }

    } while (emptyRetries < maxRetries); // end timestamp loop

    //executor.shutdown();
    log.info("Finished reasoning, total inferred triples = " + totalInferredTriples);
    log.info("Number of avoided duplicate terms = " + this.duplicates);
    log.info("Total rows retrieved from big data = " + this.totalRows);
    log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB));
    log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1);
    log.info("Total time spent in empty inference cycles = " + stopwatch2);
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase1.DoReasonTask5.java

@Override
public void run() throws IOException {

    GoogleCloudService cloud = (GoogleCloudService) this.getCloudService();

    //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE);
    //Set<String> terms = metadata.getTerms();
    //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA);
    //String bucket = metadata.getBucket();
    Stopwatch stopwatch1 = Stopwatch.createUnstarted();
    Stopwatch stopwatch2 = Stopwatch.createUnstarted();
    Set<String> termsSet;

    if (terms == null) {
        // too large, probably saved as a file
        //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE);
        log.info("Using json file for terms: " + termsFile);
        Validate.notNull(termsFile);//w  w  w.  jav a2  s.c  o m

        String localTermsFile = Utils.TEMP_FOLDER + termsFile;
        cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket);

        // convert from JSON
        termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile);

    } else {
        termsSet = ObjectUtils.csvToSet(terms);
    }

    String localSchemaFile = Utils.TEMP_FOLDER + schemaFile;
    // download the file from the cloud storage
    cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket);

    // uncompress if compressed
    if (GzipUtils.isCompressedFilename(schemaFile)) {
        localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile);
    }

    Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile,
            TermUtils.RDFS_TBOX);

    // get all the triples we care about
    Map<Term, Set<Triple>> schemaTerms = new HashMap<>();

    for (String term : termsSet) {
        if (allSchemaTriples.containsKey(term)) {
            schemaTerms.put(new Term(term), allSchemaTriples.get(term));
        }
    }

    String decoratedTable = table;
    int emptyRetries = 0;
    int totalInferredTriples = 0;
    int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6);
    String instanceId = cloud.getInstanceId();

    // timestamp loop
    do {

        List<String> productiveTerms = new ArrayList<>();
        int interimInferredTriples = 0;

        // First of all run all the queries asynchronously and remember the jobId and filename for each term
        List<Callable<Void>> queryTasks = new ArrayList<>();
        List<Callable<Void>> saveTasks = new ArrayList<>();

        for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) {

            Term term = entry.getKey();
            Set<Triple> triples = entry.getValue();

            QuerySubTask queryTask = new QuerySubTask(term, triples, decoratedTable, cloud);
            queryTasks.add(queryTask);

            SaveResultsSubTask saveTask = new SaveResultsSubTask(term, cloud);
            saveTasks.add(saveTask);
        }

        // invoke all the queries in parallel
        this.invokeAll(queryTasks);

        long start = System.currentTimeMillis();

        String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF;

        // save all the query results in files in parallel
        this.invokeAll(saveTasks);

        try (PrintWriter writer = new PrintWriter(
                new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) {

            // now loop through the queries
            for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) {

                Term term = entry.getKey();

                BigInteger rows = term.getRows();

                this.totalBytes = this.totalBytes + term.getBytes();

                // only process if triples are found matching this term
                if (!BigInteger.ZERO.equals(rows)) {

                    stopwatch1.start();

                    log.info("Reasoning for Term: " + term);

                    Set<Triple> schemaTriples = entry.getValue();
                    log.info("Schema Triples: " + Joiner.on('\n').join(schemaTriples));

                    List<String> select = GenericRule.getSelect(schemaTriples);

                    int inferredTriplesCount = this.inferAndSaveTriplesToFile(term, select, schemaTriples, rows,
                            decoratedTable, writer);

                    productiveTerms.add(term.getTerm());

                    interimInferredTriples += inferredTriplesCount;

                    this.totalRows = this.totalRows.add(rows);

                    stopwatch1.stop();

                } else {
                    log.info("Skipping term as no data found: " + term);
                }
            }
        }

        totalInferredTriples += interimInferredTriples;

        if (interimInferredTriples > 0) {

            // stream smaller numbers of inferred triples
            // try uploading from cloud storage
            int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold",
                    100000);

            log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for "
                    + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile);

            if (interimInferredTriples <= streamingThreshold) {
                // stream the data

                Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false);
                log.info("Total triples to stream into Big Data: " + inferredTriples.size());
                cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table));

                log.info("All inferred triples are streamed into Big Data table");

            } else {

                // load the data through cloud storage
                // upload the file to cloud storage
                log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile);
                StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket);
                log.info("File " + file + ", uploaded successfully. Now loading it into big data.");

                String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()),
                        TableUtils.getBigQueryTripleTable(table), false);
                log.info(
                        "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: "
                                + jobId);

            }

            // reset empty retries
            emptyRetries = 0;

            stopwatch2.reset();

        } else {
            log.info("No new inferred triples");
            // increment empty retries
            emptyRetries++;

            if (!stopwatch2.isRunning()) {
                stopwatch2.start();
            }
        }

        log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: "
                + emptyRetries);

        if (emptyRetries < maxRetries) {
            ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20));

            // FIXME move into the particular cloud implementation service
            long elapsed = System.currentTimeMillis() - start;
            decoratedTable = "[" + table + "@-" + elapsed + "-]";

            log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries);
        }

    } while (emptyRetries < maxRetries); // end timestamp loop

    executor.shutdown();
    log.info("Finished reasoning, total inferred triples = " + totalInferredTriples);
    log.info("Number of avoided duplicate terms = " + this.duplicates);
    log.info("Total rows retrieved from big data = " + this.totalRows);
    log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB));
    log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1);
    log.info("Total time spent in empty inference cycles = " + stopwatch2);
}

From source file:org.apache.drill.exec.store.parquet.AbstractParquetScanBatchCreator.java

protected ScanBatch getBatch(ExecutorFragmentContext context, AbstractParquetRowGroupScan rowGroupScan,
        OperatorContext oContext) throws ExecutionSetupException {
    final ColumnExplorer columnExplorer = new ColumnExplorer(context.getOptions(), rowGroupScan.getColumns());

    if (!columnExplorer.isStarQuery()) {
        rowGroupScan = rowGroupScan.copy(columnExplorer.getTableColumns());
        rowGroupScan.setOperatorId(rowGroupScan.getOperatorId());
    }/*w w w . j  a v a  2 s.  c o  m*/

    AbstractDrillFileSystemManager fsManager = getDrillFileSystemCreator(oContext, context.getOptions());

    // keep footers in a map to avoid re-reading them
    Map<String, ParquetMetadata> footers = new HashMap<>();
    List<RecordReader> readers = new LinkedList<>();
    List<Map<String, String>> implicitColumns = new ArrayList<>();
    Map<String, String> mapWithMaxColumns = new LinkedHashMap<>();
    for (RowGroupReadEntry rowGroup : rowGroupScan.getRowGroupReadEntries()) {
        /*
        Here we could store a map from file names to footers, to prevent re-reading the footer for each row group in a file
        TODO - to prevent reading the footer again in the parquet record reader (it is read earlier in the ParquetStorageEngine)
        we should add more information to the RowGroupInfo that will be populated upon the first read to
        provide the reader with all of th file meta-data it needs
        These fields will be added to the constructor below
        */
        try {
            Stopwatch timer = logger.isTraceEnabled() ? Stopwatch.createUnstarted() : null;
            DrillFileSystem fs = fsManager.get(rowGroupScan.getFsConf(rowGroup), rowGroup.getPath());
            if (!footers.containsKey(rowGroup.getPath())) {
                if (timer != null) {
                    timer.start();
                }

                ParquetMetadata footer = readFooter(fs.getConf(), rowGroup.getPath());
                if (timer != null) {
                    long timeToRead = timer.elapsed(TimeUnit.MICROSECONDS);
                    logger.trace("ParquetTrace,Read Footer,{},{},{},{},{},{},{}", "", rowGroup.getPath(), "", 0,
                            0, 0, timeToRead);
                }
                footers.put(rowGroup.getPath(), footer);
            }
            ParquetMetadata footer = footers.get(rowGroup.getPath());

            boolean autoCorrectCorruptDates = rowGroupScan.areCorruptDatesAutoCorrected();
            ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility
                    .detectCorruptDates(footer, rowGroupScan.getColumns(), autoCorrectCorruptDates);
            logger.debug("Contains corrupt dates: {}", containsCorruptDates);

            if (!context.getOptions().getBoolean(ExecConstants.PARQUET_NEW_RECORD_READER)
                    && !isComplex(footer)) {
                readers.add(new ParquetRecordReader(context, rowGroup.getPath(), rowGroup.getRowGroupIndex(),
                        rowGroup.getNumRecordsToRead(), fs,
                        CodecFactory.createDirectCodecFactory(fs.getConf(),
                                new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0),
                        footer, rowGroupScan.getColumns(), containsCorruptDates));
            } else {
                readers.add(new DrillParquetReader(context, footer, rowGroup, columnExplorer.getTableColumns(),
                        fs, containsCorruptDates));
            }

            List<String> partitionValues = rowGroupScan.getPartitionValues(rowGroup);
            Map<String, String> implicitValues = columnExplorer.populateImplicitColumns(rowGroup.getPath(),
                    partitionValues, rowGroupScan.supportsFileImplicitColumns());
            implicitColumns.add(implicitValues);
            if (implicitValues.size() > mapWithMaxColumns.size()) {
                mapWithMaxColumns = implicitValues;
            }

        } catch (IOException e) {
            throw new ExecutionSetupException(e);
        }
    }

    // all readers should have the same number of implicit columns, add missing ones with value null
    Map<String, String> diff = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null));
    for (Map<String, String> map : implicitColumns) {
        map.putAll(Maps.difference(map, diff).entriesOnlyOnRight());
    }

    return new ScanBatch(context, oContext, readers, implicitColumns);
}

From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask8.java

@Override
public void run() throws IOException {

    GoogleCloudService cloud = (GoogleCloudService) this.getCloudService();

    Stopwatch stopwatch1 = Stopwatch.createUnstarted();
    Stopwatch stopwatch2 = Stopwatch.createUnstarted();
    Set<String> termsSet;

    if (terms == null) {
        // too large, probably saved as a file

        log.info("Using json file for terms: " + termsFile);
        Validate.notNull(termsFile);//from   www . j a  v  a  2s .  c  om

        String localTermsFile = Utils.TEMP_FOLDER + termsFile;
        cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket);

        // convert from JSON
        termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile);

    } else {
        termsSet = ObjectUtils.csvToSet(terms);
    }

    String localSchemaFile = Utils.TEMP_FOLDER + schemaFile;
    // download the file from the cloud storage
    cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket);

    // uncompress if compressed
    if (GzipUtils.isCompressedFilename(schemaFile)) {
        localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile);
    }

    Map<Long, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaETriples(localSchemaFile,
            TermUtils.RDFS_TBOX);

    // get all the triples we care about
    schemaTerms = new HashMap<>();

    for (String termStr : termsSet) {

        Long term = Long.parseLong(termStr);

        if (allSchemaTriples.containsKey(term)) {
            schemaTerms.put(term, allSchemaTriples.get(term));
        }
    }

    String decoratedTable = table;
    int emptyRetries = 0;
    int totalInferredTriples = 0;
    int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6);
    this.ddLimit = Config.getIntegerProperty(Constants.REASON_DATA_DIRECT_DOWNLOAD_LIMIT, 1_200_000);
    String instanceId = cloud.getInstanceId();

    QueryGenerator<Long> generator = new QueryGenerator<Long>(schemaTerms, null);

    // timestamp loop
    do {

        Set<Long> productiveTerms = new HashSet<>();
        int interimInferredTriples = 0;

        // First of all run all the queries asynchronously and remember the jobId and filename for each term

        List<QueryResult> queryResults = new ArrayList<QueryResult>();
        generator.setDecoratedTable(decoratedTable);

        List<String> queries = generator.getQueries();
        log.debug("Generated Queries: " + queries);
        String queryResultFilePrefix = instanceId + '_' + System.currentTimeMillis() + "_QueryResults_";
        int fileCount = 0;
        for (String query : queries) {
            String jobId = cloud.startBigDataQuery(query, new BigDataTable(this.table));
            queryResults
                    .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId));
            fileCount++;
        }

        // invoke all the queries in parallel
        //this.invokeAll(queryTasks);

        long start = System.currentTimeMillis();

        String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF;

        // save all the query results in files in parallel
        //this.invokeAll(saveTasks);

        for (QueryResult queryResult : queryResults) {
            try {
                // block and wait for each job to complete then save results to a file
                QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(),
                        queryResult.getFilename(), this.bucket, null, this.ddLimit);
                queryResult.setStats(stats);

            } catch (IOException ioe) {

                log.error("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe);
                throw ioe;
            }
        }

        try (PrintWriter writer = new PrintWriter(
                new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) {

            // now loop through the queries
            //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) {
            for (QueryResult queryResult : queryResults) {

                //Term term = entry.getKey();
                QueryStats stats = queryResult.getStats();

                BigInteger rows = stats.getTotalRows();//term.getRows();

                this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes();

                // only process if triples are found matching this term
                if (!BigInteger.ZERO.equals(rows)) {

                    stopwatch1.start();

                    int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms,
                            writer);

                    interimInferredTriples += inferredTriplesCount;

                    this.totalRows = this.totalRows.add(rows);

                    stopwatch1.stop();

                } else {
                    log.info("Skipping query as no data is found");
                }
            }
        }

        totalInferredTriples += interimInferredTriples;

        if (interimInferredTriples > 0) {

            // stream smaller numbers of inferred triples
            // try uploading from cloud storage
            int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold",
                    100000);

            log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for "
                    + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile);

            if (interimInferredTriples <= streamingThreshold) {
                // stream the data

                Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, true);
                log.info("Total triples to stream into Big Data: " + inferredTriples.size());
                cloud.streamObjectsIntoBigData(inferredTriples,
                        TableUtils.getBigQueryEncodedTripleTable(table));

                log.info("All inferred triples are streamed into Big Data table");

            } else {

                // load the data through cloud storage
                // upload the file to cloud storage
                log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile);
                StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket);
                log.info("File " + file + ", uploaded successfully. Now loading it into big data.");

                String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()),
                        TableUtils.getBigQueryEncodedTripleTable(table), false);
                log.info(
                        "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: "
                                + jobId);

            }

            // reset empty retries
            emptyRetries = 0;

            stopwatch2.reset();

        } else {
            log.info("No new inferred triples");
            // increment empty retries
            emptyRetries++;

            if (!stopwatch2.isRunning()) {
                stopwatch2.start();
            }
        }

        log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: "
                + emptyRetries);

        if (emptyRetries < maxRetries) {
            ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20));

            // FIXME move into the particular cloud implementation service
            long elapsed = System.currentTimeMillis() - start;
            decoratedTable = "[" + table + "@-" + elapsed + "-]";

            log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries);
        }

    } while (emptyRetries < maxRetries); // end timestamp loop

    //executor.shutdown();
    log.info("Finished reasoning, total inferred triples = " + totalInferredTriples);
    //log.info("Number of avoided duplicate terms = " + this.duplicates);
    log.info("Total rows retrieved from big data = " + this.totalRows);
    log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB));
    log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1);
    log.info("Total time spent in empty inference cycles = " + stopwatch2);
}

From source file:org.n52.youngs.control.impl.SingleThreadBulkRunner.java

@Override
public Report load(final Sink sink) {
    this.sink = sink;
    Objects.nonNull(source);/*  w w w  .  jav a 2  s . c o m*/
    Objects.nonNull(mapper);
    Objects.nonNull(this.sink);

    log.info("Starting harvest from {} to {} with {}", source, this.sink, mapper);
    Report report = new ReportImpl();

    try {
        boolean prepareSink = sink.prepare(mapper.getMapper());
        if (!prepareSink) {
            String msg = "The sink could not be prepared. Stopping load, please check the logs.";
            log.error(msg);
            report.addMessage(msg);
            return report;
        }
    } catch (SinkError e) {
        log.error("Problem preparing sink", e);
        report.addMessage(String.format("Problem preparing sink: %s", e.getMessage()));
        return report;
    }

    final Stopwatch timer = Stopwatch.createStarted();
    long pageStart = startPosition;
    long count = source.getRecordCount();
    final long limit = Math.min(recordsLimit + startPosition, count);

    final Stopwatch sourceTimer = Stopwatch.createUnstarted();
    final Stopwatch mappingTimer = Stopwatch.createUnstarted();
    final Stopwatch sinkTimer = Stopwatch.createUnstarted();
    final Stopwatch currentBulkTimer = Stopwatch.createUnstarted();
    double bulkTimeAvg = 0d;
    long runNumber = 0;

    while (pageStart <= limit) {
        currentBulkTimer.start();

        long recordsLeft = limit - pageStart + 1;
        long size = Math.min(recordsLeft, bulkSize);
        if (size <= 0) {
            break;
        }
        log.info("### [{}] Requesting {} records from {} starting at {}, last requested record will be {} ###",
                runNumber, size, source.getEndpoint(), pageStart, limit);

        try {
            sourceTimer.start();
            Collection<SourceRecord> records = source.getRecords(pageStart, size, report);
            sourceTimer.stop();

            log.debug("Mapping {} retrieved records.", records.size());
            mappingTimer.start();
            List<SinkRecord> mappedRecords = records.stream().map(record -> {
                try {
                    return mapper.map(record);
                } catch (MappingError e) {
                    report.addFailedRecord(record.toString(), "Problem during mapping: " + e.getMessage());
                    return null;
                }
            }).filter(Objects::nonNull).collect(Collectors.toList());
            mappingTimer.stop();

            log.debug("Storing {} mapped records.", mappedRecords.size());
            if (!testRun) {
                sinkTimer.start();
                mappedRecords.forEach(record -> {
                    try {
                        boolean result = sink.store(record);
                        if (result) {
                            report.addSuccessfulRecord(record.getId());
                        } else {
                            report.addFailedRecord(record.getId(), "see sink log");
                        }
                    } catch (SinkError e) {
                        report.addFailedRecord(record.toString(), "Problem during mapping: " + e.getMessage());
                    }
                });
                sinkTimer.stop();
            } else {
                log.info("TESTRUN, created documents are:\n{}", Arrays.toString(mappedRecords.toArray()));
            }

        } catch (RuntimeException e) {
            if (sourceTimer.isRunning()) {
                sourceTimer.stop();
            }
            if (mappingTimer.isRunning()) {
                mappingTimer.stop();
            }
            if (sinkTimer.isRunning()) {
                sinkTimer.stop();
            }

            String msg = String.format("Problem processing records %s to %s: %s", pageStart, pageStart + size,
                    e.getMessage());
            log.error(msg, e);
            report.addMessage(msg);
        }

        pageStart += bulkSize;

        currentBulkTimer.stop();
        bulkTimeAvg = ((bulkTimeAvg * runNumber) + currentBulkTimer.elapsed(TimeUnit.SECONDS))
                / (runNumber + 1);
        updateAndLog(runNumber, (runNumber + 1) * bulkSize, currentBulkTimer.elapsed(TimeUnit.SECONDS),
                bulkTimeAvg);
        currentBulkTimer.reset();

        runNumber++;
    }

    timer.stop();
    log.info("Completed harvesting for {} ({} failed) of {} records in {} minutes",
            report.getNumberOfRecordsAdded(), report.getNumberOfRecordsFailed(), source.getRecordCount(),
            timer.elapsed(TimeUnit.MINUTES));
    log.info("Time spent (minutes): source={}, mapping={}, sink={}", sourceTimer.elapsed(TimeUnit.MINUTES),
            mappingTimer.elapsed(TimeUnit.MINUTES), sinkTimer.elapsed(TimeUnit.MINUTES));

    return report;
}

From source file:gobblin.source.extractor.extract.kafka.KafkaExtractor.java

public KafkaExtractor(WorkUnitState state) {
    super(state);
    this.workUnitState = state;
    this.topicName = KafkaUtils.getTopicName(state);
    this.partitions = KafkaUtils.getPartitions(state);
    this.lowWatermark = state.getWorkunit().getLowWatermark(MultiLongWatermark.class);
    this.highWatermark = state.getWorkunit().getExpectedHighWatermark(MultiLongWatermark.class);
    this.nextWatermark = new MultiLongWatermark(this.lowWatermark);
    this.kafkaConsumerClientResolver = new ClassAliasResolver<>(GobblinKafkaConsumerClientFactory.class);
    try {/*from w w w.j ava 2s  . co m*/
        this.kafkaConsumerClient = this.closer.register(this.kafkaConsumerClientResolver
                .resolveClass(state.getProp(KafkaSource.GOBBLIN_KAFKA_CONSUMER_CLIENT_FACTORY_CLASS,
                        KafkaSource.DEFAULT_GOBBLIN_KAFKA_CONSUMER_CLIENT_FACTORY_CLASS))
                .newInstance().create(ConfigUtils.propertiesToConfig(state.getProperties())));
    } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
        throw new RuntimeException(e);
    }

    this.stopwatch = Stopwatch.createUnstarted();

    this.decodingErrorCount = Maps.newHashMap();
    this.avgMillisPerRecord = Maps.newHashMapWithExpectedSize(this.partitions.size());
    this.avgRecordSizes = Maps.newHashMapWithExpectedSize(this.partitions.size());

    this.errorPartitions = Sets.newHashSet();

    // The actual high watermark starts with the low watermark
    this.workUnitState.setActualHighWatermark(this.lowWatermark);
}