List of usage examples for com.google.common.base Stopwatch start
public Stopwatch start()
From source file:org.opencb.cellbase.app.transform.VariationParser.java
@Override public void parse() throws IOException, InterruptedException, SQLException, ClassNotFoundException { if (!Files.exists(variationDirectoryPath) || !Files.isDirectory(variationDirectoryPath) || !Files.isReadable(variationDirectoryPath)) { throw new IOException( "Variation directory whether does not exist, is not a directory or cannot be read"); }//w w w . ja v a 2s . c o m if (!existsZippedOrUnzippedFile(VARIATION_FILENAME) || isEmpty(variationDirectoryPath.resolve(VARIATION_FILENAME).toString())) { throw new IOException("variation.txt.gz whether does not exist, is not a directory or cannot be read"); } Variation variation; // To speed up calculation a SQLite database is created with the IDs and file offsets, // file must be uncompressed for doing this. gunzipVariationInputFiles(); // add idVariation to transcript_variation file preprocessInputFiles(); // Open variation file, this file never gets uncompressed. It's read from gzip file BufferedReader bufferedReaderVariation = getBufferedReader(PREPROCESSED_VARIATION_FILENAME); // create buffered readers for all other input files createVariationFilesBufferedReaders(); Map<String, String> seqRegionMap = VariationUtils.parseSeqRegionToMap(variationDirectoryPath); Map<String, String> sourceMap = VariationUtils.parseSourceToMap(variationDirectoryPath); initializeVariationRelatedArrays(); Stopwatch globalStartwatch = Stopwatch.createStarted(); Stopwatch batchWatch = Stopwatch.createStarted(); logger.info("Parsing variation file " + variationDirectoryPath.resolve(PREPROCESSED_VARIATION_FILENAME) + " ..."); long countprocess = 0; String line; while ((line = bufferedReaderVariation.readLine()) != null) { String[] variationFields = line.split("\t"); int variationId = Integer.parseInt(variationFields[0]); List<String[]> resultVariationFeature = getVariationRelatedFields(VARIATION_FEATURE_FILE_ID, variationId); if (resultVariationFeature != null && resultVariationFeature.size() > 0) { String[] variationFeatureFields = resultVariationFeature.get(0); List<TranscriptVariation> transcriptVariation = getTranscriptVariations(variationId, variationFeatureFields[0]); List<Xref> xrefs = getXrefs(sourceMap, variationId); try { // Preparing the variation alleles String[] allelesArray = getAllelesArray(variationFeatureFields); // For code sanity save chromosome, start, end and id String chromosome = seqRegionMap.get(variationFeatureFields[1]); if (!chromosome.contains("PATCH") && !chromosome.contains("HSCHR") && !chromosome.contains("contig")) { int start = (variationFeatureFields != null) ? Integer.valueOf(variationFeatureFields[2]) : 0; int end = (variationFeatureFields != null) ? Integer.valueOf(variationFeatureFields[3]) : 0; String id = (variationFields[2] != null && !variationFields[2].equals("\\N")) ? variationFields[2] : ""; String reference = (allelesArray[0] != null && !allelesArray[0].equals("\\N")) ? allelesArray[0] : ""; String alternate = (allelesArray[1] != null && !allelesArray[1].equals("\\N")) ? allelesArray[1] : ""; // Preparing frequencies //List<PopulationFrequency> populationFrequencies = getPopulationFrequencies(variationId, allelesArray); List<PopulationFrequency> populationFrequencies = getPopulationFrequencies(chromosome, start, end, id, reference, alternate); // TODO: check that variationFeatureFields is always different to null and intergenic-variant is never used //List<String> consequenceTypes = (variationFeatureFields != null) ? Arrays.asList(variationFeatureFields[12].split(",")) : Arrays.asList("intergenic_variant"); List<String> consequenceTypes = Arrays.asList(variationFeatureFields[12].split(",")); String displayConsequenceType = getDisplayConsequenceType(consequenceTypes); // we have all the necessary to construct the 'variation' object variation = buildVariation(variationFields, variationFeatureFields, chromosome, start, end, id, reference, alternate, transcriptVariation, xrefs, populationFrequencies, allelesArray, consequenceTypes, displayConsequenceType); fileSerializer.serialize(variation, getOutputFileName(chromosome)); } if (++countprocess % 100000 == 0 && countprocess != 0) { logger.info("Processed variations: " + countprocess); logger.debug("Elapsed time processing batch: " + batchWatch); batchWatch.reset(); batchWatch.start(); } } catch (Exception e) { e.printStackTrace(); logger.error("Error parsing variation: " + e.getMessage()); logger.error("Last line processed: " + line); break; } } // TODO: just for testing, remove //if (countprocess % 100000 == 0) { // break; //} } logger.info("Variation parsing finished"); logger.info("Variants processed: " + countprocess); logger.debug("Elapsed time parsing: " + globalStartwatch); gzipVariationFiles(variationDirectoryPath); try { bufferedReaderVariation.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask7.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);/*from w w w . ja v a 2 s .c o m*/ String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<Long, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaETriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about schemaTerms = new HashMap<>(); for (String termStr : termsSet) { Long term = Long.parseLong(termStr); if (allSchemaTriples.containsKey(term)) { schemaTerms.put(term, allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); String instanceId = cloud.getInstanceId(); QueryGenerator<Long> generator = new QueryGenerator<Long>(schemaTerms, null); // timestamp loop do { Set<Long> productiveTerms = new HashSet<>(); int interimInferredTriples = 0; // First of all run all the queries asynchronously and remember the jobId and filename for each term List<QueryResult> queryResults = new ArrayList<QueryResult>(); generator.setDecoratedTable(decoratedTable); List<String> queries = generator.getQueries(); log.debug("Generated Queries: " + queries); String queryResultFilePrefix = Utils.TEMP_FOLDER + instanceId + '_' + System.currentTimeMillis() + "_QueryResults_"; int fileCount = 0; for (String query : queries) { String jobId = cloud.startBigDataQuery(query); queryResults .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId)); fileCount++; } // invoke all the queries in parallel //this.invokeAll(queryTasks); long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; // save all the query results in files in parallel //this.invokeAll(saveTasks); for (QueryResult queryResult : queryResults) { try { // block and wait for each job to complete then save results to a file QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(), queryResult.getFilename()); queryResult.setStats(stats); } catch (IOException ioe) { // transient backend errors log.warn("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe); //TODO should throw an exception } } try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) { for (QueryResult queryResult : queryResults) { //Term term = entry.getKey(); QueryStats stats = queryResult.getStats(); BigInteger rows = stats.getTotalRows();//term.getRows(); this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes(); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { stopwatch1.start(); int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms, decoratedTable, writer); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping query as no data is found"); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, true); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryEncodedTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryEncodedTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } } while (emptyRetries < maxRetries); // end timestamp loop //executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); //log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask6.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); //String table = metadata.getValue(EcarfMetaData.ECARF_TABLE); //Set<String> terms = metadata.getTerms(); //String schemaFile = metadata.getValue(EcarfMetaData.ECARF_SCHEMA); //String bucket = metadata.getBucket(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file //String termsFile = metadata.getValue(EcarfMetaData.ECARF_TERMS_FILE); log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);/* w ww .ja v a 2 s . com*/ String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<String, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaNTriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about schemaTerms = new HashMap<>(); for (String term : termsSet) { if (allSchemaTriples.containsKey(term)) { schemaTerms.put(term, allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); String instanceId = cloud.getInstanceId(); QueryGenerator<String> generator = new QueryGenerator<String>(schemaTerms, null); // timestamp loop do { Set<String> productiveTerms = new HashSet<>(); int interimInferredTriples = 0; // First of all run all the queries asynchronously and remember the jobId and filename for each term List<QueryResult> queryResults = new ArrayList<QueryResult>(); generator.setDecoratedTable(decoratedTable); List<String> queries = generator.getQueries(); log.debug("Generated Queries: " + queries); String queryResultFilePrefix = Utils.TEMP_FOLDER + instanceId + '_' + System.currentTimeMillis() + "_QueryResults_"; int fileCount = 0; for (String query : queries) { String jobId = cloud.startBigDataQuery(query); queryResults .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId)); fileCount++; } // invoke all the queries in parallel //this.invokeAll(queryTasks); long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; // save all the query results in files in parallel //this.invokeAll(saveTasks); for (QueryResult queryResult : queryResults) { try { // block and wait for each job to complete then save results to a file QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(), queryResult.getFilename()); queryResult.setStats(stats); } catch (IOException ioe) { // transient backend errors log.warn("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe); //TODO should throw an exception } } try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) { for (QueryResult queryResult : queryResults) { //Term term = entry.getKey(); QueryStats stats = queryResult.getStats(); BigInteger rows = stats.getTotalRows();//term.getRows(); this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes(); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { stopwatch1.start(); int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms, decoratedTable, writer); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping query as no data is found"); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, false); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } } while (emptyRetries < maxRetries); // end timestamp loop //executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }
From source file:io.ecarf.core.cloud.task.processor.reason.phase2.DoReasonTask8.java
@Override public void run() throws IOException { GoogleCloudService cloud = (GoogleCloudService) this.getCloudService(); Stopwatch stopwatch1 = Stopwatch.createUnstarted(); Stopwatch stopwatch2 = Stopwatch.createUnstarted(); Set<String> termsSet; if (terms == null) { // too large, probably saved as a file log.info("Using json file for terms: " + termsFile); Validate.notNull(termsFile);/*from www . j a v a2 s . c o m*/ String localTermsFile = Utils.TEMP_FOLDER + termsFile; cloud.downloadObjectFromCloudStorage(termsFile, localTermsFile, bucket); // convert from JSON termsSet = io.cloudex.framework.utils.FileUtils.jsonFileToSet(localTermsFile); } else { termsSet = ObjectUtils.csvToSet(terms); } String localSchemaFile = Utils.TEMP_FOLDER + schemaFile; // download the file from the cloud storage cloud.downloadObjectFromCloudStorage(schemaFile, localSchemaFile, bucket); // uncompress if compressed if (GzipUtils.isCompressedFilename(schemaFile)) { localSchemaFile = GzipUtils.getUncompressedFilename(localSchemaFile); } Map<Long, Set<Triple>> allSchemaTriples = TripleUtils.getRelevantSchemaETriples(localSchemaFile, TermUtils.RDFS_TBOX); // get all the triples we care about schemaTerms = new HashMap<>(); for (String termStr : termsSet) { Long term = Long.parseLong(termStr); if (allSchemaTriples.containsKey(term)) { schemaTerms.put(term, allSchemaTriples.get(term)); } } String decoratedTable = table; int emptyRetries = 0; int totalInferredTriples = 0; int maxRetries = Config.getIntegerProperty(Constants.REASON_RETRY_KEY, 6); this.ddLimit = Config.getIntegerProperty(Constants.REASON_DATA_DIRECT_DOWNLOAD_LIMIT, 1_200_000); String instanceId = cloud.getInstanceId(); QueryGenerator<Long> generator = new QueryGenerator<Long>(schemaTerms, null); // timestamp loop do { Set<Long> productiveTerms = new HashSet<>(); int interimInferredTriples = 0; // First of all run all the queries asynchronously and remember the jobId and filename for each term List<QueryResult> queryResults = new ArrayList<QueryResult>(); generator.setDecoratedTable(decoratedTable); List<String> queries = generator.getQueries(); log.debug("Generated Queries: " + queries); String queryResultFilePrefix = instanceId + '_' + System.currentTimeMillis() + "_QueryResults_"; int fileCount = 0; for (String query : queries) { String jobId = cloud.startBigDataQuery(query, new BigDataTable(this.table)); queryResults .add(QueryResult.create().setFilename(queryResultFilePrefix + fileCount).setJobId(jobId)); fileCount++; } // invoke all the queries in parallel //this.invokeAll(queryTasks); long start = System.currentTimeMillis(); String inferredTriplesFile = Utils.TEMP_FOLDER + instanceId + '_' + start + Constants.DOT_INF; // save all the query results in files in parallel //this.invokeAll(saveTasks); for (QueryResult queryResult : queryResults) { try { // block and wait for each job to complete then save results to a file QueryStats stats = cloud.saveBigQueryResultsToFile(queryResult.getJobId(), queryResult.getFilename(), this.bucket, null, this.ddLimit); queryResult.setStats(stats); } catch (IOException ioe) { log.error("failed to save query results to file, jobId: " + queryResult.getJobId(), ioe); throw ioe; } } try (PrintWriter writer = new PrintWriter( new GZIPOutputStream(new FileOutputStream(inferredTriplesFile), Constants.GZIP_BUF_SIZE))) { // now loop through the queries //for(Entry<Term, Set<Triple>> entry: schemaTerms.entrySet()) { for (QueryResult queryResult : queryResults) { //Term term = entry.getKey(); QueryStats stats = queryResult.getStats(); BigInteger rows = stats.getTotalRows();//term.getRows(); this.totalBytes = this.totalBytes + stats.getTotalProcessedBytes();//term.getBytes(); // only process if triples are found matching this term if (!BigInteger.ZERO.equals(rows)) { stopwatch1.start(); int inferredTriplesCount = this.inferAndSaveTriplesToFile(queryResult, productiveTerms, writer); interimInferredTriples += inferredTriplesCount; this.totalRows = this.totalRows.add(rows); stopwatch1.stop(); } else { log.info("Skipping query as no data is found"); } } } totalInferredTriples += interimInferredTriples; if (interimInferredTriples > 0) { // stream smaller numbers of inferred triples // try uploading from cloud storage int streamingThreshold = Config.getIntegerProperty("ecarf.io.reasoning.streaming.threshold", 100000); log.info("Inserting " + interimInferredTriples + ", inferred triples into Big Data table for " + productiveTerms.size() + " productive terms. Filename: " + inferredTriplesFile); if (interimInferredTriples <= streamingThreshold) { // stream the data Set<Triple> inferredTriples = TripleUtils.loadCompressedCSVTriples(inferredTriplesFile, true); log.info("Total triples to stream into Big Data: " + inferredTriples.size()); cloud.streamObjectsIntoBigData(inferredTriples, TableUtils.getBigQueryEncodedTripleTable(table)); log.info("All inferred triples are streamed into Big Data table"); } else { // load the data through cloud storage // upload the file to cloud storage log.info("Uploading inferred triples file into cloud storage: " + inferredTriplesFile); StorageObject file = cloud.uploadFileToCloudStorage(inferredTriplesFile, bucket); log.info("File " + file + ", uploaded successfully. Now loading it into big data."); String jobId = cloud.loadCloudStorageFilesIntoBigData(Lists.newArrayList(file.getUri()), TableUtils.getBigQueryEncodedTripleTable(table), false); log.info( "All inferred triples are loaded into Big Data table through cloud storage, completed jobId: " + jobId); } // reset empty retries emptyRetries = 0; stopwatch2.reset(); } else { log.info("No new inferred triples"); // increment empty retries emptyRetries++; if (!stopwatch2.isRunning()) { stopwatch2.start(); } } log.info("Total inferred triples so far = " + totalInferredTriples + ", current retry count: " + emptyRetries); if (emptyRetries < maxRetries) { ApiUtils.block(Config.getIntegerProperty(Constants.REASON_SLEEP_KEY, 20)); // FIXME move into the particular cloud implementation service long elapsed = System.currentTimeMillis() - start; decoratedTable = "[" + table + "@-" + elapsed + "-]"; log.info("Using table decorator: " + decoratedTable + ". Empty retries count: " + emptyRetries); } } while (emptyRetries < maxRetries); // end timestamp loop //executor.shutdown(); log.info("Finished reasoning, total inferred triples = " + totalInferredTriples); //log.info("Number of avoided duplicate terms = " + this.duplicates); log.info("Total rows retrieved from big data = " + this.totalRows); log.info("Total processed GBytes = " + ((double) this.totalBytes / FileUtils.ONE_GB)); log.info("Total process reasoning time (serialization in inf file) = " + stopwatch1); log.info("Total time spent in empty inference cycles = " + stopwatch2); }
From source file:org.opencb.cellbase.app.transform.variation.VariationParser.java
@Override public void parse() throws IOException, InterruptedException, SQLException, ClassNotFoundException { if (!Files.exists(variationDirectoryPath) || !Files.isDirectory(variationDirectoryPath) || !Files.isReadable(variationDirectoryPath)) { throw new IOException( "Variation directory whether does not exist, is not a directory or cannot be read"); }/* ww w . jav a 2 s .com*/ if (!variationFile.existsZippedOrUnzippedFile() || variationFile.isEmpty()) { throw new IOException("variation.txt.gz whether does not exist, is not a directory or cannot be read"); } // add idVariation to transcript_variation file preprocessInputFiles(); // Open variation file, this file never gets uncompressed. It's read from gzip file BufferedReader bufferedReaderVariation = variationFile.getBufferedReader(); // create buffered readers for all other input files createVariationFilesReaders(); Map<String, String> seqRegionMap = VariationUtils.parseSeqRegionToMap(variationDirectoryPath); Map<String, String> sourceMap = VariationUtils.parseSourceToMap(variationDirectoryPath); Stopwatch globalStartwatch = Stopwatch.createStarted(); Stopwatch batchWatch = Stopwatch.createStarted(); logger.info("Parsing variation file {} ...", variationDirectoryPath.resolve(PREPROCESSED_VARIATION_FILENAME)); long countprocess = 0, incorrectEndVariants = 0, incorrectAllelesVariants = 0; String line; while ((line = bufferedReaderVariation.readLine()) != null) { String[] variationFields = line.split("\t"); int variationId = Integer.parseInt(variationFields[0]); List<String[]> resultVariationFeature = variationFeatureFile.getVariationRelatedLines(variationId); if (resultVariationFeature != null && resultVariationFeature.size() > 0) { String[] variationFeatureFields = resultVariationFeature.get(0); List<TranscriptVariation> transcriptVariation = getTranscriptVariations(variationId, variationFeatureFields[0]); List<Xref> xrefs = getXrefs(sourceMap, variationId); try { // Preparing the variation alleles String[] allelesArray = getAllelesArray(variationFeatureFields); if (allelesArray == null) { logger.debug("Incorrect allele string: {}", variationFeatureFields[6]); incorrectAllelesVariants++; } else { String chromosome = seqRegionMap.get(variationFeatureFields[1]); if (!chromosome.contains("PATCH") && !chromosome.contains("HSCHR") && !chromosome.contains("contig")) { int start = Integer.valueOf(variationFeatureFields[2]); int end = Integer.valueOf(variationFeatureFields[3]); String id = (variationFields[2] != null && !variationFields[2].equals("\\N")) ? variationFields[2] : ""; String reference = (allelesArray[0] != null && !allelesArray[0].equals("\\N")) ? allelesArray[0] : ""; List<String> alternates = getAlternates(allelesArray); List<String> ids = new LinkedList<>(); ids.add(id); List<String> hgvs = getHgvs(transcriptVariation); Map<String, AdditionalAttribute> additionalAttributes = getAdditionalAttributes( variationFields, variationFeatureFields); List<ConsequenceType> conseqTypes = getConsequenceTypes(transcriptVariation); String displayConsequenceTypes = getDisplayConsequenceType(variationFeatureFields); String strand = variationFeatureFields[4]; String ancestralAllele = (variationFields[4] != null && !variationFields[4].equals("\\N")) ? variationFields[4] : ""; String minorAllele = (variationFeatureFields[16] != null && !variationFeatureFields[16].equals("\\N")) ? variationFeatureFields[16] : ""; Float minorAlleleFreq = (variationFeatureFields[17] != null && !variationFeatureFields[17].equals("\\N")) ? Float.parseFloat(variationFeatureFields[17]) : null; // create a variation object for each alternative for (String alternate : alternates) { VariantType type = getVariantType(reference, alternate); if (type == null) { logger.warn("Unrecognized variant type (won't be parsed): {}:{}-{} {}/{}", chromosome, start, end, reference, alternate); } else if (incorrectStartAndEnd(start, end, reference)) { logger.debug("Incorrect variant start-end pair: {}:{}-{} {}/{}", chromosome, start, end, reference, alternate); incorrectEndVariants++; } else { // build and serialize variant Variant variation = buildVariant(chromosome, start, end, reference, alternate, type, ids, hgvs, additionalAttributes, displayConsequenceTypes, conseqTypes, id, xrefs, strand, ancestralAllele, minorAllele, minorAlleleFreq); fileSerializer.serialize(variation, getOutputFileName(chromosome)); } countprocess++; } } } if (countprocess % 100000 == 0 && countprocess != 0) { logger.info("Processed variations: {}", countprocess); logger.debug("Elapsed time processing batch: {}", batchWatch); batchWatch.reset(); batchWatch.start(); } } catch (Exception e) { e.printStackTrace(); logger.error("Error parsing variation: {}", e.getMessage()); logger.error("Last line processed: {}", line); break; } } // // TODO: just for testing, remove // if (countprocess % 1000000 == 0) { // break; // } } serializer.close(); logger.info("Variation parsing finished"); logger.info("Variants processed: {}", countprocess); logger.info("Variants not parsed due to incorrect start-end: {}", incorrectEndVariants); logger.info("Variants not parsed due to incorrect alleles: {}", incorrectAllelesVariants); logger.debug("Elapsed time parsing: {}", globalStartwatch); gzipVariationFiles(variationDirectoryPath); try { bufferedReaderVariation.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.thinkbiganalytics.feedmgr.service.feed.DefaultFeedManagerFeedService.java
/** * Create/Update a Feed in NiFi. Save the metadata to Kylo meta store. * * @param feedMetadata the feed metadata * @return an object indicating if the feed creation was successful or not *///from w w w. j av a2s . c om private NifiFeed createAndSaveFeed(FeedMetadata feedMetadata) { Stopwatch stopwatch = Stopwatch.createStarted(); NifiFeed feed = null; if (StringUtils.isBlank(feedMetadata.getId())) { feedMetadata.setIsNew(true); //If the feed is New we need to ensure the user has CREATE_FEED entity permission if (accessController.isEntityAccessControlled()) { metadataAccess.read(() -> { //ensure the user has rights to create feeds under the category Category domainCategory = categoryProvider .findById(categoryProvider.resolveId(feedMetadata.getCategory().getId())); if (domainCategory == null) { //throw exception throw new MetadataRepositoryException( "Unable to find the category " + feedMetadata.getCategory().getSystemName()); } domainCategory.getAllowedActions().checkPermission(CategoryAccessControl.CREATE_FEED); //ensure the user has rights to create feeds using the template FeedManagerTemplate domainTemplate = templateProvider .findById(templateProvider.resolveId(feedMetadata.getTemplateId())); if (domainTemplate == null) { throw new MetadataRepositoryException( "Unable to find the template " + feedMetadata.getTemplateId()); } // domainTemplate.getAllowedActions().checkPermission(TemplateAccessControl.CREATE_FEED); }); } } else if (accessController.isEntityAccessControlled()) { metadataAccess.read(() -> { //perform explict entity access check here as we dont want to modify the NiFi flow unless user has access to edit the feed Feed.ID domainId = feedProvider.resolveId(feedMetadata.getId()); Feed domainFeed = feedProvider.findById(domainId); if (domainFeed != null) { domainFeed.getAllowedActions().checkPermission(FeedAccessControl.EDIT_DETAILS); } else { throw new NotFoundException("Feed not found for id " + feedMetadata.getId()); } }); } //replace expressions with values if (feedMetadata.getTable() != null) { feedMetadata.getTable().updateMetadataFieldValues(); } if (feedMetadata.getProperties() == null) { feedMetadata.setProperties(new ArrayList<NifiProperty>()); } //store ref to the originalFeedProperties before resolving and merging with the template List<NifiProperty> orignialFeedProperties = feedMetadata.getProperties(); //get all the properties for the metadata RegisteredTemplate registeredTemplate = registeredTemplateService .findRegisteredTemplate(new RegisteredTemplateRequest.Builder() .templateId(feedMetadata.getTemplateId()).templateName(feedMetadata.getTemplateName()) .isFeedEdit(true).includeSensitiveProperties(true).build()); //update the template properties with the feedMetadata properties List<NifiProperty> matchedProperties = NifiPropertyUtil.matchAndSetPropertyByProcessorName( registeredTemplate.getProperties(), feedMetadata.getProperties(), NifiPropertyUtil.PROPERTY_MATCH_AND_UPDATE_MODE.UPDATE_ALL_PROPERTIES); feedMetadata.setProperties(registeredTemplate.getProperties()); feedMetadata.setRegisteredTemplate(registeredTemplate); //resolve any ${metadata.} properties List<NifiProperty> resolvedProperties = propertyExpressionResolver.resolvePropertyExpressions(feedMetadata); /* //store all input related properties as well List<NifiProperty> inputProperties = NifiPropertyUtil .findInputProperties(registeredTemplate.getProperties()); ///store only those matched and resolved in the final metadata store Set<NifiProperty> updatedProperties = new HashSet<>(); //first get all those selected properties where the value differs from the template value List<NifiProperty> modifiedProperties = registeredTemplate.findModifiedDefaultProperties(); if (modifiedProperties != null) { propertyExpressionResolver.resolvePropertyExpressions(modifiedProperties,feedMetadata); updatedProperties.addAll(modifiedProperties); } updatedProperties.addAll(matchedProperties); updatedProperties.addAll(resolvedProperties); updatedProperties.addAll(inputProperties); feedMetadata.setProperties(new ArrayList<NifiProperty>(updatedProperties)); */ //decrypt the metadata feedModelTransform.decryptSensitivePropertyValues(feedMetadata); FeedMetadata.STATE state = FeedMetadata.STATE.NEW; try { state = FeedMetadata.STATE.valueOf(feedMetadata.getState()); } catch (Exception e) { //if the string isnt valid, disregard as it will end up disabling the feed. } boolean enabled = (FeedMetadata.STATE.NEW.equals(state) && feedMetadata.isActive()) || FeedMetadata.STATE.ENABLED.equals(state); // flag to indicate to enable the feed later //if this is the first time for this feed and it is set to be enabled, mark it to be enabled after we commit to the JCR store boolean enableLater = false; if (enabled && feedMetadata.isNew()) { enableLater = true; enabled = false; feedMetadata.setState(FeedMetadata.STATE.DISABLED.name()); } CreateFeedBuilder feedBuilder = CreateFeedBuilder .newFeed(nifiRestClient, nifiFlowCache, feedMetadata, registeredTemplate.getNifiTemplateId(), propertyExpressionResolver, propertyDescriptorTransform, niFiObjectCache) .enabled(enabled).removeInactiveVersionedProcessGroup(removeInactiveNifiVersionedFeedFlows) .autoAlign(nifiAutoFeedsAlignAfterSave).withNiFiTemplateCache(niFiTemplateCache); if (registeredTemplate.isReusableTemplate()) { feedBuilder.setReusableTemplate(true); feedMetadata.setIsReusableFeed(true); } else { feedBuilder.inputProcessorType(feedMetadata.getInputProcessorType()) .feedSchedule(feedMetadata.getSchedule()).properties(feedMetadata.getProperties()); if (registeredTemplate.usesReusableTemplate()) { for (ReusableTemplateConnectionInfo connection : registeredTemplate .getReusableTemplateConnections()) { feedBuilder.addInputOutputPort(new InputOutputPort( connection.getReusableTemplateInputPortName(), connection.getFeedOutputPortName())); } } } stopwatch.stop(); log.debug("Time to prepare data for saving feed in NiFi: {} ms", stopwatch.elapsed(TimeUnit.MILLISECONDS)); stopwatch.reset(); stopwatch.start(); NifiProcessGroup entity = feedBuilder.build(); stopwatch.stop(); log.debug("Time to save feed in NiFi: {} ms", stopwatch.elapsed(TimeUnit.MILLISECONDS)); stopwatch.reset(); feed = new NifiFeed(feedMetadata, entity); //set the original feedProperties back to the feed feedMetadata.setProperties(orignialFeedProperties); //encrypt the metadata properties feedModelTransform.encryptSensitivePropertyValues(feedMetadata); if (entity.isSuccess()) { feedMetadata.setNifiProcessGroupId(entity.getProcessGroupEntity().getId()); try { stopwatch.start(); saveFeed(feedMetadata); feed.setEnableAfterSave(enableLater); feed.setSuccess(true); stopwatch.stop(); log.debug("Time to saveFeed in Kylo: {} ms", stopwatch.elapsed(TimeUnit.MILLISECONDS)); stopwatch.reset(); stopwatch.start(); feedBuilder.checkAndRemoveVersionedProcessGroup(); } catch (Exception e) { feed.setSuccess(false); feed.addErrorMessage(e); } } else { feed.setSuccess(false); } if (!feed.isSuccess()) { if (!entity.isRolledBack()) { try { feedBuilder.rollback(); } catch (FeedRollbackException rollbackException) { log.error("Error rolling back feed {}. {} ", feedMetadata.getCategoryAndFeedName(), rollbackException.getMessage()); feed.addErrorMessage("Error occurred in rolling back the Feed."); } entity.setRolledBack(true); } } return feed; }