List of usage examples for com.google.common.base Stopwatch isRunning
boolean isRunning
To view the source code for com.google.common.base Stopwatch isRunning.
Click Source Link
From source file:com.palantir.common.base.PrefetchingBatchingVisitable.java
@Override public <K extends Exception> boolean batchAccept(final int batchSize, AbortingVisitor<? super List<T>, K> v) throws K { final Queue<List<T>> queue = Queues.newArrayDeque(); final Lock lock = new ReentrantLock(); final Condition itemAvailable = lock.newCondition(); final Condition spaceAvailable = lock.newCondition(); final AtomicBoolean futureIsDone = new AtomicBoolean(false); final AtomicReference<Throwable> exception = new AtomicReference<Throwable>(); final Stopwatch fetchTime = Stopwatch.createUnstarted(); final Stopwatch fetchBlockedTime = Stopwatch.createUnstarted(); final Stopwatch visitTime = Stopwatch.createUnstarted(); final Stopwatch visitBlockedTime = Stopwatch.createUnstarted(); Future<?> future = exec.submit(new Runnable() { @Override// ww w . ja v a2s . c o m public void run() { try { fetchTime.start(); delegate.batchAccept(batchSize, new AbortingVisitor<List<T>, InterruptedException>() { @Override public boolean visit(List<T> item) throws InterruptedException { fetchTime.stop(); fetchBlockedTime.start(); lock.lock(); try { while (queue.size() >= capacity) { spaceAvailable.await(); } fetchBlockedTime.stop(); queue.add(item); itemAvailable.signalAll(); } finally { lock.unlock(); } fetchTime.start(); return true; } }); fetchTime.stop(); } catch (InterruptedException e) { // shutting down } catch (Throwable t) { exception.set(t); } finally { if (fetchTime.isRunning()) { fetchTime.stop(); } if (fetchBlockedTime.isRunning()) { fetchBlockedTime.stop(); } lock.lock(); try { futureIsDone.set(true); itemAvailable.signalAll(); } finally { lock.unlock(); } } } }); try { while (true) { List<T> batch; visitBlockedTime.start(); lock.lock(); try { while (queue.isEmpty()) { if (futureIsDone.get()) { if (exception.get() != null) { throw Throwables.rewrapAndThrowUncheckedException(exception.get()); } return true; } itemAvailable.await(); } batch = queue.poll(); spaceAvailable.signalAll(); } finally { lock.unlock(); } visitBlockedTime.stop(); visitTime.start(); boolean proceed = v.visit(batch); visitTime.stop(); if (!proceed) { return false; } } } catch (InterruptedException e) { throw Throwables.rewrapAndThrowUncheckedException(e); } finally { log.debug("{} timings: fetch {}, fetchBlocked {}, visit {}, visitBlocked {}", name, fetchTime, fetchBlockedTime, visitTime, visitBlockedTime); future.cancel(true); } }
From source file:com.elite.tools.soar.NetworkDispatcher.java
@Override public void run() { InnerRequest<?> request;/*from w w w . ja v a 2 s . c o m*/ while (true) { Stopwatch stopwatch = Stopwatch.createStarted(); // release previous request object to avoid leaking request object when mQueue is drained. // ???queue? request = null; try { // ? request = mQueue.take(); } catch (InterruptedException e) { if (mQuit) { return; } continue; } try { // ???? if (request.isCanceled()) { request.finish("network-discard-cancelled"); continue; } // NetworkResponse networkResponse = mNetwork.performRequest(request); // ?304??? // ????????? if (networkResponse.notModified && request.hasHadResponseDelivered()) { request.finish("not-modified"); continue; } // ?? InnerResponse<?> response = request.parseNetworkResponse(networkResponse); // // TODO: 304??metadata?? if (request.shouldCache() && response.cacheEntry != null) { mCache.put(request.getCacheKey(), response.cacheEntry); } // ?? request.markDelivered(); mDelivery.postResponse(request, response); } catch (SoarError soarError) { //?? soarError.setNetworkTimeMs(stopwatch.stop().elapsed(TimeUnit.MILLISECONDS)); parseAndDeliverNetworkError(request, soarError); } catch (Exception e) { //???SoarError LOG.error("Unhandled exception {}", e); SoarError soarError = new SoarError(e); soarError.setNetworkTimeMs(stopwatch.stop().elapsed(TimeUnit.MILLISECONDS)); mDelivery.postError(request, soarError); } finally { if (stopwatch.isRunning()) { stopwatch.stop(); } } } }
From source file:org.n52.youngs.control.impl.SingleThreadBulkRunner.java
@Override public Report load(final Sink sink) { this.sink = sink; Objects.nonNull(source);// w w w . j a va 2 s . com Objects.nonNull(mapper); Objects.nonNull(this.sink); log.info("Starting harvest from {} to {} with {}", source, this.sink, mapper); Report report = new ReportImpl(); try { boolean prepareSink = sink.prepare(mapper.getMapper()); if (!prepareSink) { String msg = "The sink could not be prepared. Stopping load, please check the logs."; log.error(msg); report.addMessage(msg); return report; } } catch (SinkError e) { log.error("Problem preparing sink", e); report.addMessage(String.format("Problem preparing sink: %s", e.getMessage())); return report; } final Stopwatch timer = Stopwatch.createStarted(); long pageStart = startPosition; long count = source.getRecordCount(); final long limit = Math.min(recordsLimit + startPosition, count); final Stopwatch sourceTimer = Stopwatch.createUnstarted(); final Stopwatch mappingTimer = Stopwatch.createUnstarted(); final Stopwatch sinkTimer = Stopwatch.createUnstarted(); final Stopwatch currentBulkTimer = Stopwatch.createUnstarted(); double bulkTimeAvg = 0d; long runNumber = 0; while (pageStart <= limit) { currentBulkTimer.start(); long recordsLeft = limit - pageStart + 1; long size = Math.min(recordsLeft, bulkSize); if (size <= 0) { break; } log.info("### [{}] Requesting {} records from {} starting at {}, last requested record will be {} ###", runNumber, size, source.getEndpoint(), pageStart, limit); try { sourceTimer.start(); Collection<SourceRecord> records = source.getRecords(pageStart, size, report); sourceTimer.stop(); log.debug("Mapping {} retrieved records.", records.size()); mappingTimer.start(); List<SinkRecord> mappedRecords = records.stream().map(record -> { try { return mapper.map(record); } catch (MappingError e) { report.addFailedRecord(record.toString(), "Problem during mapping: " + e.getMessage()); return null; } }).filter(Objects::nonNull).collect(Collectors.toList()); mappingTimer.stop(); log.debug("Storing {} mapped records.", mappedRecords.size()); if (!testRun) { sinkTimer.start(); mappedRecords.forEach(record -> { try { boolean result = sink.store(record); if (result) { report.addSuccessfulRecord(record.getId()); } else { report.addFailedRecord(record.getId(), "see sink log"); } } catch (SinkError e) { report.addFailedRecord(record.toString(), "Problem during mapping: " + e.getMessage()); } }); sinkTimer.stop(); } else { log.info("TESTRUN, created documents are:\n{}", Arrays.toString(mappedRecords.toArray())); } } catch (RuntimeException e) { if (sourceTimer.isRunning()) { sourceTimer.stop(); } if (mappingTimer.isRunning()) { mappingTimer.stop(); } if (sinkTimer.isRunning()) { sinkTimer.stop(); } String msg = String.format("Problem processing records %s to %s: %s", pageStart, pageStart + size, e.getMessage()); log.error(msg, e); report.addMessage(msg); } pageStart += bulkSize; currentBulkTimer.stop(); bulkTimeAvg = ((bulkTimeAvg * runNumber) + currentBulkTimer.elapsed(TimeUnit.SECONDS)) / (runNumber + 1); updateAndLog(runNumber, (runNumber + 1) * bulkSize, currentBulkTimer.elapsed(TimeUnit.SECONDS), bulkTimeAvg); currentBulkTimer.reset(); runNumber++; } timer.stop(); log.info("Completed harvesting for {} ({} failed) of {} records in {} minutes", report.getNumberOfRecordsAdded(), report.getNumberOfRecordsFailed(), source.getRecordCount(), timer.elapsed(TimeUnit.MINUTES)); log.info("Time spent (minutes): source={}, mapping={}, sink={}", sourceTimer.elapsed(TimeUnit.MINUTES), mappingTimer.elapsed(TimeUnit.MINUTES), sinkTimer.elapsed(TimeUnit.MINUTES)); return report; }
From source file:org.eclipse.tracecompass.totalads.ui.diagnosis.BackgroundTesting.java
@Override public void run() { String msg = null;// ww w . j a v a 2s . c om Stopwatch stopwatch = Stopwatch.createStarted(); try { ProgressConsole console = new ProgressConsole(Messages.BackgroundTesting_ConsoleTitle); console.println(Messages.BackgroundTesting_ConsoleStartMessage); AlgorithmOutStream outStreamAlg = new AlgorithmOutStream(); outStreamAlg.addObserver(console); File[] fileList = new File(fTestDirectory).listFiles(); if (fileList == null) { throw new TotalADSGeneralException(Messages.BackgroundTesting_NoFiles); } else if (fileList.length > 15000) { throw new TotalADSGeneralException(Messages.BackgroundTesting_TraceLimit); } AlgorithmUtility.testModels(fTestDirectory, fTraceReader, fModels, outStreamAlg, this); fResultsAndFeedback.setTotalTraceCount(Integer.toString(fileList.length)); stopwatch.stop(); Long elapsedMins = stopwatch.elapsed(TimeUnit.MINUTES); Long elapsedSecs = stopwatch.elapsed(TimeUnit.SECONDS); console.println(NLS.bind(Messages.BackgroundTesting_TotalTime, elapsedMins.toString(), elapsedSecs)); } catch (TotalADSGeneralException ex) {// handle UI exceptions here // UI exceptions are simply notifications--no need to log them if (ex.getMessage() == null) { msg = Messages.BackgroundTesting_GeneralException; } else { msg = ex.getMessage(); } } catch (TotalADSDBMSException ex) {// handle IDataAccessObject // exceptions here if (ex.getMessage() == null) { msg = Messages.BackgroundTesting_CommonException; } else { msg = Messages.BackgroundTesting_DBMSException + ex.getMessage(); } Logger.getLogger(BackgroundModeling.class.getName()).log(Level.WARNING, msg, ex); } catch (TotalADSReaderException ex) {// handle Reader exceptions here if (ex.getMessage() == null) { msg = Messages.BackgroundTesting_CommonException; } else { msg = Messages.BackgroundTesting_ReaderException + ex.getMessage(); } Logger.getLogger(BackgroundModeling.class.getName()).log(Level.WARNING, msg, ex); } catch (Exception ex) { // handle all other exceptions here and log // them too if (ex.getMessage() == null) { msg = Messages.BackgroundTesting_CommonException; } else { msg = ex.getMessage(); } Logger.getLogger(BackgroundTesting.class.getName()).log(Level.SEVERE, msg, ex); // An exception could be thrown due to unavailability of the db, // make sure that the connection is not lost DBMSFactory.INSTANCE.verifyConnection(); // We don't have to worry about exceptions here as the above // function handles all the exceptions // and just returns a message. This function also initializes // connection info to a correct value // We cannot write above function under ConnectinException block // because such exception is never thrown // and Eclipse starts throwing errors } finally { final String exception = msg; Display.getDefault().syncExec(new Runnable() { @Override public void run() { String msgTitle = "TotalADS"; //$NON-NLS-1$ if (exception != null) { // If there has been any exception // then show its message MessageDialog.openError(PlatformUI.getWorkbench().getActiveWorkbenchWindow().getShell(), msgTitle, exception); } else { MessageDialog.openInformation( PlatformUI.getWorkbench().getActiveWorkbenchWindow().getShell(), msgTitle, Messages.BackgroundTesting_CompletionMessage); } fBtnAnalysisEvaluateModels.setEnabled(true); fBtnStop.setEnabled(false); } }); if (stopwatch.isRunning()) { stopwatch.stop(); } } // End of finally }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask9.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); this.setup(cloud); String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries; if (this.retries == null) { maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); } else {//from w w w .jav a 2 s .c o m maxRetries = this.retries; } int cycleSleep; if (this.sleep == null) { cycleSleep = Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20); } else { cycleSleep = this.sleep; } this.ddLimit = Config.getIntegerProperty(Constants.REASON_DATA_DIRECT_DOWNLOAD_LIMIT, 1_200_000); int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); String instanceId = cloud.getInstanceId(); int processors = Runtime.getRuntime().availableProcessors(); if (processors > 1) { this.executor = Utils.createFixedThreadPool(processors); } int count = 0; QueryGenerator<Long> generator = new QueryGenerator<Long>(schemaTerms, null); // timestamp loop do { // First of all run all the queries asynchronously and remember the jobId and filename for each term generator.setDecoratedTable(decoratedTable); String query = generator.getQuery(); log.debug("Generated Query: " + query); String queryResultFilePrefix = instanceId + "_QueryResults_" + count; String jobId = cloud.startBigDataQuery(query, new BigDataTable(this.table)); //QueryResult queryResult = QueryResult.create().setFilename(queryResultFilePrefix).setJobId(jobId); long start = System.currentTimeMillis(); // block and wait for each job to complete then save results to a file QueryStats stats = cloud.saveBigQueryResultsToFile(jobId, queryResultFilePrefix, this.bucket, processors, this.ddLimit); BigInteger rows = stats.getTotalRows(); this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes(); Set<Long> productiveTerms = new HashSet<>(); Set<String> inferredTriplesFiles = new HashSet<>(); int interimInferredTriples = 0; // only process if triples are found matching this term if ((rows != null) && !BigInteger.ZERO.equals(rows)) { stopwatch1.start(); interimInferredTriples = this.inferAndSaveTriplesToFile(stats, productiveTerms, processors, inferredTriplesFiles); this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping query as no data is found"); } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFiles); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = new HashSet<>(); for (String inferredTriplesFile : inferredTriplesFiles) { TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, true, inferredTriples); } log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryEncodedTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { List<String> cloudStorageFiles = new ArrayList<>(); // load the data through cloud storage // upload the file to cloud storage for (String inferredTriplesFile : inferredTriplesFiles) { log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); cloudStorageFiles.add(file.getUri()); } jobId = cloud.loadCloudStorageFilesIntoBigData(cloudStorageFiles, TableUtils.getBigQueryEncodedTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(cycleSleep); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } count++; } while (emptyRetries < maxRetries); // end timestamp loop executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); //log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }
From source file:io.ecarf.core.cloud.task.processor.reason.phase1.DoReasonTask5.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE); //Set<String> terms = metadata.getTerms(); //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA); //String bucket = metadata.getBucket(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE); log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);//from w w w . j av a 2 s.c om String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about Map<Term, Set<Triple>> schemaTerms = new HashMap<>(); for (String term : termsSet) { if (allSchemaTriples.containsKey(term)) { schemaTerms.put(new Term(term), allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); String instanceId = cloud.getInstanceId(); // timestamp loop do { List<String> productiveTerms = new ArrayList<>(); int interimInferredTriples = 0; // First of all run all the queries asynchronously and remember the jobId and filename for each term List<Callable<Void>> queryTasks = new ArrayList<>(); List<Callable<Void>> saveTasks = new ArrayList<>(); for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) { Term term = entry.getKey(); Set<Triple> triples = entry.getValue(); QuerySubTask queryTask = new QuerySubTask(term, triples, decoratedTable, cloud); queryTasks.add(queryTask); SaveResultsSubTask saveTask = new SaveResultsSubTask(term, cloud); saveTasks.add(saveTask); } // invoke all the queries in parallel this.invokeAll(queryTasks); long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; // save all the query results in files in parallel this.invokeAll(saveTasks); try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) { Term term = entry.getKey(); BigInteger rows = term.getRows(); this.totalBytes = this.totalBytes + term.getBytes(); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { stopwatch1.start(); log.info("Reasoning for Term: " + term); Set<Triple> schemaTriples = entry.getValue(); log.info("Schema Triples: " + Joiner.on('\n').join(schemaTriples)); List<String> select = GenericRule.getSelect(schemaTriples); int inferredTriplesCount = this.inferAndSaveTriplesToFile(term, select, schemaTriples, rows, decoratedTable, writer); productiveTerms.add(term.getTerm()); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping term as no data found: " + term); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } } while (emptyRetries < maxRetries); // end timestamp loop executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask6.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE); //Set<String> terms = metadata.getTerms(); //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA); //String bucket = metadata.getBucket(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE); log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);//from w ww.jav a2 s . c om String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about schemaTerms = new HashMap<>(); for (String term : termsSet) { if (allSchemaTriples.containsKey(term)) { schemaTerms.put(term, allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); String instanceId = cloud.getInstanceId(); QueryGenerator<String> generator = new QueryGenerator<String>(schemaTerms, null); // timestamp loop do { Set<String> productiveTerms = new HashSet<>(); int interimInferredTriples = 0; // First of all run all the queries asynchronously and remember the jobId and filename for each term List<QueryResult> queryResults = new ArrayList<QueryResult>(); generator.setDecoratedTable(decoratedTable); List<String> queries = generator.getQueries(); log.debug("Generated Queries: " + queries); String queryResultFilePrefix = Utils.TEMP_FOLDER + instanceId + '_' + System.currentTimeMillis() + "_QueryResults_"; int fileCount = 0; for (String query : queries) { String jobId = cloud.startBigDataQuery(query); queryResults .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId)); fileCount++; } // invoke all the queries in parallel //this.invokeAll(queryTasks); long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; // save all the query results in files in parallel //this.invokeAll(saveTasks); for (QueryResult queryResult : queryResults) { try { // block and wait for each job to complete then save results to a file QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(), queryResult.getFilename()); queryResult.setStats(stats); } catch (IOException ioe) { // transient backend errors log.warn("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe); //TODO should throw an exception } } try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) { for (QueryResult queryResult : queryResults) { //Term term = entry.getKey(); QueryStats stats = queryResult.getStats(); BigInteger rows = stats.getTotalRows();//term.getRows(); this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes(); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { stopwatch1.start(); int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms, decoratedTable, writer); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping query as no data is found"); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } } while (emptyRetries < maxRetries); // end timestamp loop //executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask7.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);/* ww w . j a v a 2 s . co m*/ String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<Long, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaETriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about schemaTerms = new HashMap<>(); for (String termStr : termsSet) { Long term = Long.parseLong(termStr); if (allSchemaTriples.containsKey(term)) { schemaTerms.put(term, allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); String instanceId = cloud.getInstanceId(); QueryGenerator<Long> generator = new QueryGenerator<Long>(schemaTerms, null); // timestamp loop do { Set<Long> productiveTerms = new HashSet<>(); int interimInferredTriples = 0; // First of all run all the queries asynchronously and remember the jobId and filename for each term List<QueryResult> queryResults = new ArrayList<QueryResult>(); generator.setDecoratedTable(decoratedTable); List<String> queries = generator.getQueries(); log.debug("Generated Queries: " + queries); String queryResultFilePrefix = Utils.TEMP_FOLDER + instanceId + '_' + System.currentTimeMillis() + "_QueryResults_"; int fileCount = 0; for (String query : queries) { String jobId = cloud.startBigDataQuery(query); queryResults .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId)); fileCount++; } // invoke all the queries in parallel //this.invokeAll(queryTasks); long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; // save all the query results in files in parallel //this.invokeAll(saveTasks); for (QueryResult queryResult : queryResults) { try { // block and wait for each job to complete then save results to a file QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(), queryResult.getFilename()); queryResult.setStats(stats); } catch (IOException ioe) { // transient backend errors log.warn("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe); //TODO should throw an exception } } try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) { for (QueryResult queryResult : queryResults) { //Term term = entry.getKey(); QueryStats stats = queryResult.getStats(); BigInteger rows = stats.getTotalRows();//term.getRows(); this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes(); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { stopwatch1.start(); int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms, decoratedTable, writer); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping query as no data is found"); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, true); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryEncodedTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryEncodedTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } } while (emptyRetries < maxRetries); // end timestamp loop //executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); //log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask8.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);//from ww w . j a v a 2 s.c o m String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<Long, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaETriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about schemaTerms = new HashMap<>(); for (String termStr : termsSet) { Long term = Long.parseLong(termStr); if (allSchemaTriples.containsKey(term)) { schemaTerms.put(term, allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); this.ddLimit = Config.getIntegerProperty(Constants.REASON_DATA_DIRECT_DOWNLOAD_LIMIT, 1_200_000); String instanceId = cloud.getInstanceId(); QueryGenerator<Long> generator = new QueryGenerator<Long>(schemaTerms, null); // timestamp loop do { Set<Long> productiveTerms = new HashSet<>(); int interimInferredTriples = 0; // First of all run all the queries asynchronously and remember the jobId and filename for each term List<QueryResult> queryResults = new ArrayList<QueryResult>(); generator.setDecoratedTable(decoratedTable); List<String> queries = generator.getQueries(); log.debug("Generated Queries: " + queries); String queryResultFilePrefix = instanceId + '_' + System.currentTimeMillis() + "_QueryResults_"; int fileCount = 0; for (String query : queries) { String jobId = cloud.startBigDataQuery(query, new BigDataTable(this.table)); queryResults .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId)); fileCount++; } // invoke all the queries in parallel //this.invokeAll(queryTasks); long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; // save all the query results in files in parallel //this.invokeAll(saveTasks); for (QueryResult queryResult : queryResults) { try { // block and wait for each job to complete then save results to a file QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(), queryResult.getFilename(), this.bucket, null, this.ddLimit); queryResult.setStats(stats); } catch (IOException ioe) { log.error("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe); throw ioe; } } try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) { for (QueryResult queryResult : queryResults) { //Term term = entry.getKey(); QueryStats stats = queryResult.getStats(); BigInteger rows = stats.getTotalRows();//term.getRows(); this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes(); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { stopwatch1.start(); int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms, writer); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping query as no data is found"); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, true); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryEncodedTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryEncodedTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } } while (emptyRetries < maxRetries); // end timestamp loop //executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); //log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }