List of usage examples for com.google.common.base Stopwatch createUnstarted
@CheckReturnValue public static Stopwatch createUnstarted()
From source file:es.usc.citius.composit.cli.command.CompositionCommand.java
private void benchmark(ComposIT<Concept, Boolean> composit, WSCTest.Dataset dataset, int cycles) { // Compute benchmark String bestSample = null;/*from w w w . j ava 2 s .c o m*/ Stopwatch watch = Stopwatch.createUnstarted(); long minMS = Long.MAX_VALUE; for (int i = 0; i < cycles; i++) { System.out.println("[ComposIT Search] Starting benchmark cycle " + (i + 1)); watch.start(); composit.search(dataset.getRequest()); long ms = watch.stop().elapsed(TimeUnit.MILLISECONDS); if (ms < minMS) { minMS = ms; } watch.reset(); if (cli.isMetrics()) { cli.println(" > Metrics: "); cli.println(" METRICS NOT IMPLEMENTED"); } } System.out.println( "[Benchmark Result] " + cycles + "-cycle benchmark completed. Best time: " + minMS + " ms."); if (cli.isMetrics() && bestSample != null) { cli.println("Best sample: " + bestSample); } }
From source file:org.opendaylight.protocol.bgp.rib.impl.BGPSessionStats.java
public BGPSessionStats(final Open remoteOpen, final int holdTimerValue, final int keepAlive, final Channel channel, final Optional<BGPSessionPreferences> localPreferences, final Collection<BgpTableType> tableTypes) { this.sessionStopwatch = Stopwatch.createUnstarted(); this.stats = new BgpSessionState(); this.stats.setHoldtimeCurrent(holdTimerValue); this.stats.setKeepaliveCurrent(keepAlive); this.stats.setPeerPreferences(setPeerPref(remoteOpen, channel, tableTypes)); this.stats.setSpeakerPreferences(setSpeakerPref(channel, localPreferences)); initMsgs();/*w w w . j a v a2 s. com*/ }
From source file:com.diskoverorta.legal.LegalManager.java
public String tagLegalTextAnalyticsComponents(String sDoc, Map<String, String> apiConfig) { logger.info("tagging legal text analytics components"); Stopwatch allTimer = Stopwatch.createUnstarted(); Stopwatch entitiesTimer = Stopwatch.createUnstarted(); Stopwatch ontologyTimer = Stopwatch.createUnstarted(); allTimer.start();// www . j ava2 s . co m Set<String> personEntities = new HashSet<String>(); Set<String> orgEntities = new HashSet<String>(); List<String> sentList = m_snlp.splitSentencesINDocument(sDoc); String chunkSize = null; logger.info("getting chunk size"); if ((apiConfig != null) && (apiConfig.containsKey("chunksize") == true)) chunkSize = apiConfig.get("chunksize"); logger.info("Chunking sentences"); if (chunkSize != null) sentList = chunkSentences(sentList, chunkSize); String jsonOutput = ""; Gson gson = new GsonBuilder().setPrettyPrinting().create(); Map<String, Set<String>> ontologyTemp = null; List<LegalObject> legalcomponents = new ArrayList<LegalObject>(); for (String temp : sentList) { LegalObject legalcomponent = new LegalObject(); ontologyTimer.start(); ontologyTemp = m_oManager.getOntologyForSelectedTerms(temp, m_config.ontologyConfig); ontologyTimer.stop(); legalcomponent.sentence = temp; entitiesTimer.start(); legalcomponent.entities = m_eManager.getSelectedEntitiesForSentence(temp, m_config.entityConfig); entitiesTimer.stop(); logger.info("Inserting person entities"); insertEntity(personEntities, legalcomponent.entities.person); logger.info("Inserting OrganiZation entities"); insertEntity(orgEntities, legalcomponent.entities.organization); legalcomponent.events = ontologyTemp.get("Events"); legalcomponent.topics = ontologyTemp.get("Topics"); legalcomponents.add(legalcomponent); } logger.info("getting coref for selected entities and store it in a map"); Map<String, Map<String, Set<String>>> coref_out = m_coref.getCorefForSelectedEntites(sDoc, personEntities, orgEntities, m_config.corefConfig); logger.info("getting coref Inverse Map for person entity"); Map<String, Set<String>> coref_person = Duke.getCoref(personEntities); Map<String, Set<String>> coref_org = Duke.getCoref(orgEntities); Map<String, String> gpersonCoref = getCorefInvMap(coref_person); logger.info("getting coref Inverse Map for Organization entity"); Map<String, String> gorgCoref = getCorefInvMap(coref_org); for (LegalObject temp : legalcomponents) { temp.personAlias = getMatchedCoref(gpersonCoref, temp.entities.person); temp.orgAlias = getMatchedCoref(gorgCoref, temp.entities.organization); } jsonOutput = gson.toJson(legalcomponents); logger.info("Person Organization took" + entitiesTimer); logger.info("Topics and Events took" + ontologyTimer); logger.info("Total time taken" + allTimer); allTimer.stop(); return jsonOutput; }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask7.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);/*from w w w . j a va 2s .c om*/ String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<Long, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaETriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about schemaTerms = new HashMap<>(); for (String termStr : termsSet) { Long term = Long.parseLong(termStr); if (allSchemaTriples.containsKey(term)) { schemaTerms.put(term, allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); String instanceId = cloud.getInstanceId(); QueryGenerator<Long> generator = new QueryGenerator<Long>(schemaTerms, null); // timestamp loop do { Set<Long> productiveTerms = new HashSet<>(); int interimInferredTriples = 0; // First of all run all the queries asynchronously and remember the jobId and filename for each term List<QueryResult> queryResults = new ArrayList<QueryResult>(); generator.setDecoratedTable(decoratedTable); List<String> queries = generator.getQueries(); log.debug("Generated Queries: " + queries); String queryResultFilePrefix = Utils.TEMP_FOLDER + instanceId + '_' + System.currentTimeMillis() + "_QueryResults_"; int fileCount = 0; for (String query : queries) { String jobId = cloud.startBigDataQuery(query); queryResults .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId)); fileCount++; } // invoke all the queries in parallel //this.invokeAll(queryTasks); long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; // save all the query results in files in parallel //this.invokeAll(saveTasks); for (QueryResult queryResult : queryResults) { try { // block and wait for each job to complete then save results to a file QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(), queryResult.getFilename()); queryResult.setStats(stats); } catch (IOException ioe) { // transient backend errors log.warn("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe); //TODO should throw an exception } } try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) { for (QueryResult queryResult : queryResults) { //Term term = entry.getKey(); QueryStats stats = queryResult.getStats(); BigInteger rows = stats.getTotalRows();//term.getRows(); this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes(); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { stopwatch1.start(); int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms, decoratedTable, writer); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping query as no data is found"); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, true); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryEncodedTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryEncodedTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } } while (emptyRetries < maxRetries); // end timestamp loop //executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); //log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask6.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE); //Set<String> terms = metadata.getTerms(); //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA); //String bucket = metadata.getBucket(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE); log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);/* w ww . j a v a 2 s . c o m*/ String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about schemaTerms = new HashMap<>(); for (String term : termsSet) { if (allSchemaTriples.containsKey(term)) { schemaTerms.put(term, allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); String instanceId = cloud.getInstanceId(); QueryGenerator<String> generator = new QueryGenerator<String>(schemaTerms, null); // timestamp loop do { Set<String> productiveTerms = new HashSet<>(); int interimInferredTriples = 0; // First of all run all the queries asynchronously and remember the jobId and filename for each term List<QueryResult> queryResults = new ArrayList<QueryResult>(); generator.setDecoratedTable(decoratedTable); List<String> queries = generator.getQueries(); log.debug("Generated Queries: " + queries); String queryResultFilePrefix = Utils.TEMP_FOLDER + instanceId + '_' + System.currentTimeMillis() + "_QueryResults_"; int fileCount = 0; for (String query : queries) { String jobId = cloud.startBigDataQuery(query); queryResults .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId)); fileCount++; } // invoke all the queries in parallel //this.invokeAll(queryTasks); long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; // save all the query results in files in parallel //this.invokeAll(saveTasks); for (QueryResult queryResult : queryResults) { try { // block and wait for each job to complete then save results to a file QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(), queryResult.getFilename()); queryResult.setStats(stats); } catch (IOException ioe) { // transient backend errors log.warn("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe); //TODO should throw an exception } } try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) { for (QueryResult queryResult : queryResults) { //Term term = entry.getKey(); QueryStats stats = queryResult.getStats(); BigInteger rows = stats.getTotalRows();//term.getRows(); this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes(); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { stopwatch1.start(); int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms, decoratedTable, writer); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping query as no data is found"); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } } while (emptyRetries < maxRetries); // end timestamp loop //executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }
From source file:io.ecarf.core.cloud.task.processor.reason.phase1.DoReasonTask5.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE); //Set<String> terms = metadata.getTerms(); //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA); //String bucket = metadata.getBucket(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE); log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);//w w w. jav a2 s.c o m String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about Map<Term, Set<Triple>> schemaTerms = new HashMap<>(); for (String term : termsSet) { if (allSchemaTriples.containsKey(term)) { schemaTerms.put(new Term(term), allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); String instanceId = cloud.getInstanceId(); // timestamp loop do { List<String> productiveTerms = new ArrayList<>(); int interimInferredTriples = 0; // First of all run all the queries asynchronously and remember the jobId and filename for each term List<Callable<Void>> queryTasks = new ArrayList<>(); List<Callable<Void>> saveTasks = new ArrayList<>(); for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) { Term term = entry.getKey(); Set<Triple> triples = entry.getValue(); QuerySubTask queryTask = new QuerySubTask(term, triples, decoratedTable, cloud); queryTasks.add(queryTask); SaveResultsSubTask saveTask = new SaveResultsSubTask(term, cloud); saveTasks.add(saveTask); } // invoke all the queries in parallel this.invokeAll(queryTasks); long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; // save all the query results in files in parallel this.invokeAll(saveTasks); try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries for (Entry<Term, Set<Triple>> entry : schemaTerms.entrySet()) { Term term = entry.getKey(); BigInteger rows = term.getRows(); this.totalBytes = this.totalBytes + term.getBytes(); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { stopwatch1.start(); log.info("Reasoning for Term: " + term); Set<Triple> schemaTriples = entry.getValue(); log.info("Schema Triples: " + Joiner.on('\n').join(schemaTriples)); List<String> select = GenericRule.getSelect(schemaTriples); int inferredTriplesCount = this.inferAndSaveTriplesToFile(term, select, schemaTriples, rows, decoratedTable, writer); productiveTerms.add(term.getTerm()); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping term as no data found: " + term); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } } while (emptyRetries < maxRetries); // end timestamp loop executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }
From source file:org.apache.drill.exec.store.parquet.AbstractParquetScanBatchCreator.java
protected ScanBatch getBatch(ExecutorFragmentContext context, AbstractParquetRowGroupScan rowGroupScan, OperatorContext oContext) throws ExecutionSetupException { final ColumnExplorer columnExplorer = new ColumnExplorer(context.getOptions(), rowGroupScan.getColumns()); if (!columnExplorer.isStarQuery()) { rowGroupScan = rowGroupScan.copy(columnExplorer.getTableColumns()); rowGroupScan.setOperatorId(rowGroupScan.getOperatorId()); }/*w w w . j a v a 2 s. c o m*/ AbstractDrillFileSystemManager fsManager = getDrillFileSystemCreator(oContext, context.getOptions()); // keep footers in a map to avoid re-reading them Map<String, ParquetMetadata> footers = new HashMap<>(); List<RecordReader> readers = new LinkedList<>(); List<Map<String, String>> implicitColumns = new ArrayList<>(); Map<String, String> mapWithMaxColumns = new LinkedHashMap<>(); for (RowGroupReadEntry rowGroup : rowGroupScan.getRowGroupReadEntries()) { /* Here we could store a map from file names to footers, to prevent re-reading the footer for each row group in a file TODO - to prevent reading the footer again in the parquet record reader (it is read earlier in the ParquetStorageEngine) we should add more information to the RowGroupInfo that will be populated upon the first read to provide the reader with all of th file meta-data it needs These fields will be added to the constructor below */ try { Stopwatch timer = logger.isTraceEnabled() ? Stopwatch.createUnstarted() : null; DrillFileSystem fs = fsManager.get(rowGroupScan.getFsConf(rowGroup), rowGroup.getPath()); if (!footers.containsKey(rowGroup.getPath())) { if (timer != null) { timer.start(); } ParquetMetadata footer = readFooter(fs.getConf(), rowGroup.getPath()); if (timer != null) { long timeToRead = timer.elapsed(TimeUnit.MICROSECONDS); logger.trace("ParquetTrace,Read Footer,{},{},{},{},{},{},{}", "", rowGroup.getPath(), "", 0, 0, 0, timeToRead); } footers.put(rowGroup.getPath(), footer); } ParquetMetadata footer = footers.get(rowGroup.getPath()); boolean autoCorrectCorruptDates = rowGroupScan.areCorruptDatesAutoCorrected(); ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility .detectCorruptDates(footer, rowGroupScan.getColumns(), autoCorrectCorruptDates); logger.debug("Contains corrupt dates: {}", containsCorruptDates); if (!context.getOptions().getBoolean(ExecConstants.PARQUET_NEW_RECORD_READER) && !isComplex(footer)) { readers.add(new ParquetRecordReader(context, rowGroup.getPath(), rowGroup.getRowGroupIndex(), rowGroup.getNumRecordsToRead(), fs, CodecFactory.createDirectCodecFactory(fs.getConf(), new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0), footer, rowGroupScan.getColumns(), containsCorruptDates)); } else { readers.add(new DrillParquetReader(context, footer, rowGroup, columnExplorer.getTableColumns(), fs, containsCorruptDates)); } List<String> partitionValues = rowGroupScan.getPartitionValues(rowGroup); Map<String, String> implicitValues = columnExplorer.populateImplicitColumns(rowGroup.getPath(), partitionValues, rowGroupScan.supportsFileImplicitColumns()); implicitColumns.add(implicitValues); if (implicitValues.size() > mapWithMaxColumns.size()) { mapWithMaxColumns = implicitValues; } } catch (IOException e) { throw new ExecutionSetupException(e); } } // all readers should have the same number of implicit columns, add missing ones with value null Map<String, String> diff = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null)); for (Map<String, String> map : implicitColumns) { map.putAll(Maps.difference(map, diff).entriesOnlyOnRight()); } return new ScanBatch(context, oContext, readers, implicitColumns); }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask8.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);//from www . j a v a 2s . c om String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<Long, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaETriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about schemaTerms = new HashMap<>(); for (String termStr : termsSet) { Long term = Long.parseLong(termStr); if (allSchemaTriples.containsKey(term)) { schemaTerms.put(term, allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); this.ddLimit = Config.getIntegerProperty(Constants.REASON_DATA_DIRECT_DOWNLOAD_LIMIT, 1_200_000); String instanceId = cloud.getInstanceId(); QueryGenerator<Long> generator = new QueryGenerator<Long>(schemaTerms, null); // timestamp loop do { Set<Long> productiveTerms = new HashSet<>(); int interimInferredTriples = 0; // First of all run all the queries asynchronously and remember the jobId and filename for each term List<QueryResult> queryResults = new ArrayList<QueryResult>(); generator.setDecoratedTable(decoratedTable); List<String> queries = generator.getQueries(); log.debug("Generated Queries: " + queries); String queryResultFilePrefix = instanceId + '_' + System.currentTimeMillis() + "_QueryResults_"; int fileCount = 0; for (String query : queries) { String jobId = cloud.startBigDataQuery(query, new BigDataTable(this.table)); queryResults .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId)); fileCount++; } // invoke all the queries in parallel //this.invokeAll(queryTasks); long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; // save all the query results in files in parallel //this.invokeAll(saveTasks); for (QueryResult queryResult : queryResults) { try { // block and wait for each job to complete then save results to a file QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(), queryResult.getFilename(), this.bucket, null, this.ddLimit); queryResult.setStats(stats); } catch (IOException ioe) { log.error("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe); throw ioe; } } try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) { for (QueryResult queryResult : queryResults) { //Term term = entry.getKey(); QueryStats stats = queryResult.getStats(); BigInteger rows = stats.getTotalRows();//term.getRows(); this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes(); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { stopwatch1.start(); int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms, writer); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping query as no data is found"); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, true); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryEncodedTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryEncodedTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } } while (emptyRetries < maxRetries); // end timestamp loop //executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); //log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }
From source file:org.n52.youngs.control.impl.SingleThreadBulkRunner.java
@Override public Report load(final Sink sink) { this.sink = sink; Objects.nonNull(source);/* w w w . jav a 2 s . c o m*/ Objects.nonNull(mapper); Objects.nonNull(this.sink); log.info("Starting harvest from {} to {} with {}", source, this.sink, mapper); Report report = new ReportImpl(); try { boolean prepareSink = sink.prepare(mapper.getMapper()); if (!prepareSink) { String msg = "The sink could not be prepared. Stopping load, please check the logs."; log.error(msg); report.addMessage(msg); return report; } } catch (SinkError e) { log.error("Problem preparing sink", e); report.addMessage(String.format("Problem preparing sink: %s", e.getMessage())); return report; } final Stopwatch timer = Stopwatch.createStarted(); long pageStart = startPosition; long count = source.getRecordCount(); final long limit = Math.min(recordsLimit + startPosition, count); final Stopwatch sourceTimer = Stopwatch.createUnstarted(); final Stopwatch mappingTimer = Stopwatch.createUnstarted(); final Stopwatch sinkTimer = Stopwatch.createUnstarted(); final Stopwatch currentBulkTimer = Stopwatch.createUnstarted(); double bulkTimeAvg = 0d; long runNumber = 0; while (pageStart <= limit) { currentBulkTimer.start(); long recordsLeft = limit - pageStart + 1; long size = Math.min(recordsLeft, bulkSize); if (size <= 0) { break; } log.info("### [{}] Requesting {} records from {} starting at {}, last requested record will be {} ###", runNumber, size, source.getEndpoint(), pageStart, limit); try { sourceTimer.start(); Collection<SourceRecord> records = source.getRecords(pageStart, size, report); sourceTimer.stop(); log.debug("Mapping {} retrieved records.", records.size()); mappingTimer.start(); List<SinkRecord> mappedRecords = records.stream().map(record -> { try { return mapper.map(record); } catch (MappingError e) { report.addFailedRecord(record.toString(), "Problem during mapping: " + e.getMessage()); return null; } }).filter(Objects::nonNull).collect(Collectors.toList()); mappingTimer.stop(); log.debug("Storing {} mapped records.", mappedRecords.size()); if (!testRun) { sinkTimer.start(); mappedRecords.forEach(record -> { try { boolean result = sink.store(record); if (result) { report.addSuccessfulRecord(record.getId()); } else { report.addFailedRecord(record.getId(), "see sink log"); } } catch (SinkError e) { report.addFailedRecord(record.toString(), "Problem during mapping: " + e.getMessage()); } }); sinkTimer.stop(); } else { log.info("TESTRUN, created documents are:\n{}", Arrays.toString(mappedRecords.toArray())); } } catch (RuntimeException e) { if (sourceTimer.isRunning()) { sourceTimer.stop(); } if (mappingTimer.isRunning()) { mappingTimer.stop(); } if (sinkTimer.isRunning()) { sinkTimer.stop(); } String msg = String.format("Problem processing records %s to %s: %s", pageStart, pageStart + size, e.getMessage()); log.error(msg, e); report.addMessage(msg); } pageStart += bulkSize; currentBulkTimer.stop(); bulkTimeAvg = ((bulkTimeAvg * runNumber) + currentBulkTimer.elapsed(TimeUnit.SECONDS)) / (runNumber + 1); updateAndLog(runNumber, (runNumber + 1) * bulkSize, currentBulkTimer.elapsed(TimeUnit.SECONDS), bulkTimeAvg); currentBulkTimer.reset(); runNumber++; } timer.stop(); log.info("Completed harvesting for {} ({} failed) of {} records in {} minutes", report.getNumberOfRecordsAdded(), report.getNumberOfRecordsFailed(), source.getRecordCount(), timer.elapsed(TimeUnit.MINUTES)); log.info("Time spent (minutes): source={}, mapping={}, sink={}", sourceTimer.elapsed(TimeUnit.MINUTES), mappingTimer.elapsed(TimeUnit.MINUTES), sinkTimer.elapsed(TimeUnit.MINUTES)); return report; }
From source file:gobblin.source.extractor.extract.kafka.KafkaExtractor.java
public KafkaExtractor(WorkUnitState state) { super(state); this.workUnitState = state; this.topicName = KafkaUtils.getTopicName(state); this.partitions = KafkaUtils.getPartitions(state); this.lowWatermark = state.getWorkunit().getLowWatermark(MultiLongWatermark.class); this.highWatermark = state.getWorkunit().getExpectedHighWatermark(MultiLongWatermark.class); this.nextWatermark = new MultiLongWatermark(this.lowWatermark); this.kafkaConsumerClientResolver = new ClassAliasResolver<>(GobblinKafkaConsumerClientFactory.class); try {/*from w w w.j ava 2s . co m*/ this.kafkaConsumerClient = this.closer.register(this.kafkaConsumerClientResolver .resolveClass(state.getProp(KafkaSource.GOBBLIN_KAFKA_CONSUMER_CLIENT_FACTORY_CLASS, KafkaSource.DEFAULT_GOBBLIN_KAFKA_CONSUMER_CLIENT_FACTORY_CLASS)) .newInstance().create(ConfigUtils.propertiesToConfig(state.getProperties()))); } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) { throw new RuntimeException(e); } this.stopwatch = Stopwatch.createUnstarted(); this.decodingErrorCount = Maps.newHashMap(); this.avgMillisPerRecord = Maps.newHashMapWithExpectedSize(this.partitions.size()); this.avgRecordSizes = Maps.newHashMapWithExpectedSize(this.partitions.size()); this.errorPartitions = Sets.newHashSet(); // The actual high watermark starts with the low watermark this.workUnitState.setActualHighWatermark(this.lowWatermark); }