List of usage examples for java.util.concurrent.atomic AtomicLong set
public final void set(long newValue)
From source file:com.raphfrk.craftproxyclient.gui.CraftProxyGUI.java
public long getCapacity() { final AtomicLong capacity = new AtomicLong(); try {//from w ww . j av a2s .co m SwingUtilities.invokeAndWait(new Runnable() { public void run() { long size; try { size = Long.parseLong(desiredSize.getText()) * 1024 * 1024; } catch (NumberFormatException e) { SwingUtilities.invokeLater(new Runnable() { public void run() { GUIManager.messageBox("Unable to parse desired file cache size, using maximum"); } }); size = Long.MAX_VALUE; } capacity.set(size); } }); } catch (InvocationTargetException e) { return Long.MAX_VALUE; } catch (InterruptedException e) { Thread.currentThread().interrupt(); } return capacity.get(); }
From source file:org.apache.hadoop.hbase.quotas.TestSpaceQuotasWithSnapshots.java
void waitForStableQuotaSize(Connection conn, TableName tn, String ns) throws Exception { // For some stability in the value before proceeding // Helps make sure that we got the actual last value, not some inbetween AtomicLong lastValue = new AtomicLong(-1); AtomicInteger counter = new AtomicInteger(0); TEST_UTIL.waitFor(15_000, 500, new SpaceQuotaSnapshotPredicate(conn, tn, ns) { @Override/*from ww w .j av a 2 s . c o m*/ boolean evaluate(SpaceQuotaSnapshot snapshot) throws Exception { LOG.debug("Last observed size=" + lastValue.get()); if (snapshot.getUsage() == lastValue.get()) { int numMatches = counter.incrementAndGet(); if (numMatches >= 5) { return true; } // Not yet.. return false; } counter.set(0); lastValue.set(snapshot.getUsage()); return false; } }); }
From source file:org.apache.hadoop.hbase.quotas.TestSpaceQuotasWithSnapshots.java
void waitForStableRegionSizeReport(Connection conn, TableName tn) throws Exception { // For some stability in the value before proceeding // Helps make sure that we got the actual last value, not some inbetween AtomicLong lastValue = new AtomicLong(-1); AtomicInteger counter = new AtomicInteger(0); TEST_UTIL.waitFor(15_000, 500, new Predicate<Exception>() { @Override/*from w ww .jav a2 s . com*/ public boolean evaluate() throws Exception { LOG.debug("Last observed size=" + lastValue.get()); long actual = getRegionSizeReportForTable(conn, tn); if (actual == lastValue.get()) { int numMatches = counter.incrementAndGet(); if (numMatches >= 5) { return true; } // Not yet.. return false; } counter.set(0); lastValue.set(actual); return false; } }); }
From source file:org.apache.hadoop.raid.DistBlockIntegrityMonitor.java
public static Job startOneJob(Worker newWorker, Priority pri, Set<String> jobFiles, long detectTime, AtomicLong numFilesSubmitted, AtomicLong lastCheckingTime, long maxPendingJobs) throws IOException, InterruptedException, ClassNotFoundException { if (lastCheckingTime != null) { lastCheckingTime.set(System.currentTimeMillis()); }//from w w w. j a va2 s .co m String startTimeStr = dateFormat.format(new Date()); String jobName = newWorker.JOB_NAME_PREFIX + "." + newWorker.jobCounter + "." + pri + "-pri" + "." + startTimeStr; Job job = null; synchronized (jobFiles) { if (jobFiles.size() == 0) { return null; } newWorker.jobCounter++; synchronized (newWorker.jobIndex) { if (newWorker.jobIndex.size() >= maxPendingJobs) { // full return null; } job = newWorker.startJob(jobName, jobFiles, pri, detectTime); } numFilesSubmitted.addAndGet(jobFiles.size()); jobFiles.clear(); } return job; }
From source file:org.apache.nifi.processors.standard.QueryDatabaseTable.java
@Override public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException { ProcessSession session = sessionFactory.createSession(); final List<FlowFile> resultSetFlowFiles = new ArrayList<>(); final ComponentLog logger = getLogger(); final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class); final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue()); final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions().getValue(); final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions().getValue(); final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES) .evaluateAttributeExpressions().getValue(); final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions().asInteger(); final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE) .evaluateAttributeExpressions().asInteger(); final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions().asInteger() : 0;// w ww.j av a 2 s .c o m final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean(); final Map<String, String> maxValueProperties = getDefaultMaxValueProperties(context.getProperties()); final StateManager stateManager = context.getStateManager(); final StateMap stateMap; try { stateMap = stateManager.getState(Scope.CLUSTER); } catch (final IOException ioe) { getLogger().error("Failed to retrieve observed maximum values from the State Manager. Will not perform " + "query until this is accomplished.", ioe); context.yield(); return; } // Make a mutable copy of the current state property map. This will be updated by the result row callback, and eventually // set as the current state map (after the session has been committed) final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap()); //If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) { String maxPropKey = maxProp.getKey().toLowerCase(); String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey); if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) { String newMaxPropValue; // If we can't find the value at the fully-qualified key name, it is possible (under a previous scheme) // the value has been stored under a key that is only the column name. Fall back to check the column name, // but store the new initial max value under the fully-qualified key. if (statePropertyMap.containsKey(maxPropKey)) { newMaxPropValue = statePropertyMap.get(maxPropKey); } else { newMaxPropValue = maxProp.getValue(); } statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue); } } List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames) ? null : Arrays.asList(maxValueColumnNames.split("\\s*,\\s*")); final String selectQuery = getQuery(dbAdapter, tableName, columnNames, maxValueColumnNameList, statePropertyMap); final StopWatch stopWatch = new StopWatch(true); final String fragmentIdentifier = UUID.randomUUID().toString(); try (final Connection con = dbcpService.getConnection(); final Statement st = con.createStatement()) { if (fetchSize != null && fetchSize > 0) { try { st.setFetchSize(fetchSize); } catch (SQLException se) { // Not all drivers support this, just log the error (at debug level) and move on logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se); } } String jdbcURL = "DBCPService"; try { DatabaseMetaData databaseMetaData = con.getMetaData(); if (databaseMetaData != null) { jdbcURL = databaseMetaData.getURL(); } } catch (SQLException se) { // Ignore and use default JDBC URL. This shouldn't happen unless the driver doesn't implement getMetaData() properly } final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions() .asTimePeriod(TimeUnit.SECONDS).intValue(); st.setQueryTimeout(queryTimeout); // timeout in seconds try { logger.debug("Executing query {}", new Object[] { selectQuery }); final ResultSet resultSet = st.executeQuery(selectQuery); int fragmentIndex = 0; while (true) { final AtomicLong nrOfRows = new AtomicLong(0L); FlowFile fileToProcess = session.create(); try { fileToProcess = session.write(fileToProcess, out -> { // Max values will be updated in the state property map by the callback final MaxValueResultSetRowCollector maxValCollector = new MaxValueResultSetRowCollector( tableName, statePropertyMap, dbAdapter); try { nrOfRows.set(JdbcCommon.convertToAvroStream(resultSet, out, tableName, maxValCollector, maxRowsPerFlowFile, convertNamesForAvro)); } catch (SQLException | RuntimeException e) { throw new ProcessException( "Error during database query or conversion of records to Avro.", e); } }); } catch (ProcessException e) { // Add flowfile to results before rethrowing so it will be removed from session in outer catch resultSetFlowFiles.add(fileToProcess); throw e; } if (nrOfRows.get() > 0) { // set attribute how many rows were selected fileToProcess = session.putAttribute(fileToProcess, RESULT_ROW_COUNT, String.valueOf(nrOfRows.get())); fileToProcess = session.putAttribute(fileToProcess, RESULT_TABLENAME, tableName); if (maxRowsPerFlowFile > 0) { fileToProcess = session.putAttribute(fileToProcess, "fragment.identifier", fragmentIdentifier); fileToProcess = session.putAttribute(fileToProcess, "fragment.index", String.valueOf(fragmentIndex)); } logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { fileToProcess, nrOfRows.get() }); session.getProvenanceReporter().receive(fileToProcess, jdbcURL, stopWatch.getElapsed(TimeUnit.MILLISECONDS)); resultSetFlowFiles.add(fileToProcess); } else { // If there were no rows returned, don't send the flowfile session.remove(fileToProcess); context.yield(); break; } fragmentIndex++; if (maxFragments > 0 && fragmentIndex >= maxFragments) { break; } } for (int i = 0; i < resultSetFlowFiles.size(); i++) { // Add maximum values as attributes for (Map.Entry<String, String> entry : statePropertyMap.entrySet()) { // Get just the column name from the key String key = entry.getKey(); String colName = key .substring(key.lastIndexOf(NAMESPACE_DELIMITER) + NAMESPACE_DELIMITER.length()); resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "maxvalue." + colName, entry.getValue())); } //set count on all FlowFiles if (maxRowsPerFlowFile > 0) { resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex))); } } } catch (final SQLException e) { throw e; } session.transfer(resultSetFlowFiles, REL_SUCCESS); } catch (final ProcessException | SQLException e) { logger.error("Unable to execute SQL select query {} due to {}", new Object[] { selectQuery, e }); if (!resultSetFlowFiles.isEmpty()) { session.remove(resultSetFlowFiles); } context.yield(); } finally { session.commit(); try { // Update the state stateManager.setState(statePropertyMap, Scope.CLUSTER); } catch (IOException ioe) { getLogger().error("{} failed to update State Manager, maximum observed values will not be recorded", new Object[] { this, ioe }); } } }
From source file:org.lendingclub.mercator.docker.SwarmScanner.java
public void scanServicesForSwarm(String swarmClusterId) { JsonNode response = getRestClient().getServices(); AtomicLong earlistUpdate = new AtomicLong(Long.MAX_VALUE); AtomicBoolean error = new AtomicBoolean(false); response.forEach(it -> {// w w w .j a va2s . co m try { ObjectNode n = flattenService(it); n.put("swarmClusterId", swarmClusterId); dockerScanner.getNeoRxClient().execCypher( "merge (x:DockerService {serviceId:{serviceId}}) set x+={props}, x.updateTs=timestamp() return x", "serviceId", n.get("serviceId").asText(), "props", n).forEach(svc -> { removeDockerLabels("DockerService", "serviceId", n.get("serviceId").asText(), n, svc); earlistUpdate.set( Math.min(earlistUpdate.get(), svc.path("updateTs").asLong(Long.MAX_VALUE))); }); dockerScanner.getNeoRxClient().execCypher( "match (swarm:DockerSwarm {swarmClusterId:{swarmClusterId}}),(service:DockerService{serviceId:{serviceId}}) merge (swarm)-[x:CONTAINS]->(service) set x.updateTs=timestamp()", "swarmClusterId", swarmClusterId, "serviceId", n.path("serviceId").asText()); } catch (Exception e) { logger.warn("problem updating service", e); error.set(true); } }); if (error.get() == false) { if (earlistUpdate.get() < System.currentTimeMillis()) { dockerScanner.getNeoRxClient().execCypher( "match (x:DockerService) where x.swarmClusterId={swarmClusterId} and x.updateTs<{cutoff} detach delete x", "cutoff", earlistUpdate.get(), "swarmClusterId", swarmClusterId); } } }
From source file:org.deeplearning4j.models.word2vec.Word2Vec.java
private void doIteration(Collection<List<VocabWord>> batch2, final AtomicLong numWordsSoFar, final AtomicLong nextRandom, ActorSystem actorSystem) { final AtomicLong lastReported = new AtomicLong(System.currentTimeMillis()); Parallelization.iterateInParallel(batch2, new Parallelization.RunnableWithParams<List<VocabWord>>() { @Override//from ww w. j a va 2 s. com public void run(List<VocabWord> sentence, Object[] args) { double alpha = Math.max(minLearningRate, Word2Vec.this.alpha.get() * (1 - (1.0 * numWordsSoFar.get() / (double) totalWords))); long now = System.currentTimeMillis(); long diff = Math.abs(now - lastReported.get()); if (numWordsSoFar.get() > 0 && diff > 1000) { lastReported.set(now); log.info("Words so far " + numWordsSoFar.get() + " with alpha at " + alpha); } trainSentence(sentence, nextRandom, alpha); numWordsSoFar.set(numWordsSoFar.get() + sentence.size()); } }, actorSystem); }
From source file:org.apache.nifi.processors.hive.SelectHive_1_1QL.java
private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null); FlowFile flowfile = null;/*from w w w . j a v a 2s . com*/ // If we have no FlowFile, and all incoming connections are self-loops then we can continue on. // However, if we have no FlowFile and we have connections coming from other Processors, then // we know that we should run only if we have a FlowFile. if (context.hasIncomingConnection()) { if (fileToProcess == null && context.hasNonLoopConnection()) { return; } } final ComponentLog logger = getLogger(); final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE) .asControllerService(Hive_1_1DBCPService.class); final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); List<String> preQueries = getQueries( context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); List<String> postQueries = getQueries( context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet()); // Source the SQL String hqlStatement; if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) { hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess) .getValue(); } else { // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query. // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled. final StringBuilder queryContents = new StringBuilder(); session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset))); hqlStatement = queryContents.toString(); } final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess) .asInteger(); final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE) .evaluateAttributeExpressions(fileToProcess).asInteger(); final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger() : 0; final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue(); final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean(); final StopWatch stopWatch = new StopWatch(true); final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER) .evaluateAttributeExpressions(fileToProcess).getValue(); final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER) .evaluateAttributeExpressions(fileToProcess).getValue(); final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean(); final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); final String fragmentIdentifier = UUID.randomUUID().toString(); try (final Connection con = dbcpService .getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes()); final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement())) { Pair<String, SQLException> failure = executeConfigStatements(con, preQueries); if (failure != null) { // In case of failure, assigning config query to "hqlStatement" to follow current error handling hqlStatement = failure.getLeft(); flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw failure.getRight(); } if (fetchSize != null && fetchSize > 0) { try { st.setFetchSize(fetchSize); } catch (SQLException se) { // Not all drivers support this, just log the error (at debug level) and move on logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se); } } final List<FlowFile> resultSetFlowFiles = new ArrayList<>(); try { logger.debug("Executing query {}", new Object[] { hqlStatement }); if (flowbased) { // Hive JDBC Doesn't Support this yet: // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData(); // int paramCount = pmd.getParameterCount(); // Alternate way to determine number of params in SQL. int paramCount = StringUtils.countMatches(hqlStatement, "?"); if (paramCount > 0) { setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes()); } } final ResultSet resultSet; try { resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(hqlStatement)); } catch (SQLException se) { // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw se; } int fragmentIndex = 0; String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null; while (true) { final AtomicLong nrOfRows = new AtomicLong(0L); flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess); if (baseFilename == null) { baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key()); } try { flowfile = session.write(flowfile, out -> { try { if (AVRO.equals(outputFormat)) { nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro)); } else if (CSV.equals(outputFormat)) { CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile); nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options)); } else { nrOfRows.set(0L); throw new ProcessException("Unsupported output format: " + outputFormat); } } catch (final SQLException | RuntimeException e) { throw new ProcessException("Error during database query or conversion of records.", e); } }); } catch (ProcessException e) { // Add flowfile to results before rethrowing so it will be removed from session in outer catch resultSetFlowFiles.add(flowfile); throw e; } if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) { final Map<String, String> attributes = new HashMap<>(); // Set attribute for how many rows were selected attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get())); try { // Set input/output table names by parsing the query attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement))); } catch (Exception e) { // If failed to parse the query, just log a warning message, but continue. getLogger().warn("Failed to parse query: {} due to {}", new Object[] { hqlStatement, e }, e); } // Set MIME type on output document and add extension to filename if (AVRO.equals(outputFormat)) { attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY); attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro"); } else if (CSV.equals(outputFormat)) { attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv"); } if (maxRowsPerFlowFile > 0) { attributes.put("fragment.identifier", fragmentIdentifier); attributes.put("fragment.index", String.valueOf(fragmentIndex)); } flowfile = session.putAllAttributes(flowfile, attributes); logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'", new Object[] { flowfile, nrOfRows.get() }); if (context.hasIncomingConnection()) { // If the flow file came from an incoming connection, issue a Fetch provenance event session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(), "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS)); } else { // If we created a flow file from rows received from Hive, issue a Receive provenance event session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS)); } resultSetFlowFiles.add(flowfile); } else { // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on session.remove(flowfile); if (resultSetFlowFiles != null && resultSetFlowFiles.size() > 0) { flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size() - 1); } break; } fragmentIndex++; if (maxFragments > 0 && fragmentIndex >= maxFragments) { break; } } for (int i = 0; i < resultSetFlowFiles.size(); i++) { // Set count on all FlowFiles if (maxRowsPerFlowFile > 0) { resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex))); } } } catch (final SQLException e) { throw e; } failure = executeConfigStatements(con, postQueries); if (failure != null) { hqlStatement = failure.getLeft(); if (resultSetFlowFiles != null) { resultSetFlowFiles.forEach(ff -> session.remove(ff)); } flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw failure.getRight(); } session.transfer(resultSetFlowFiles, REL_SUCCESS); if (fileToProcess != null) { session.remove(fileToProcess); } } catch (final ProcessException | SQLException e) { logger.error("Issue processing SQL {} due to {}.", new Object[] { hqlStatement, e }); if (flowfile == null) { // This can happen if any exceptions occur while setting up the connection, statement, etc. logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", new Object[] { hqlStatement, e }); context.yield(); } else { if (context.hasIncomingConnection()) { logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", new Object[] { hqlStatement, flowfile, e }); flowfile = session.penalize(flowfile); } else { logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", new Object[] { hqlStatement, e }); context.yield(); } session.transfer(flowfile, REL_FAILURE); } } }
From source file:org.apache.nifi.processors.cassandra.QueryCassandra.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { FlowFile fileToProcess = null;/*from ww w . ja v a2 s . c om*/ if (context.hasIncomingConnection()) { fileToProcess = session.get(); // If we have no FlowFile, and all incoming connections are self-loops then we can continue on. // However, if we have no FlowFile and we have connections coming from other Processors, then // we know that we should run only if we have a FlowFile. if (fileToProcess == null && context.hasNonLoopConnection()) { return; } } final ComponentLog logger = getLogger(); final String selectQuery = context.getProperty(CQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess) .getValue(); final long queryTimeout = context.getProperty(QUERY_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS); final String outputFormat = context.getProperty(OUTPUT_FORMAT).getValue(); final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); final StopWatch stopWatch = new StopWatch(true); if (fileToProcess == null) { fileToProcess = session.create(); } try { // The documentation for the driver recommends the session remain open the entire time the processor is running // and states that it is thread-safe. This is why connectionSession is not in a try-with-resources. final Session connectionSession = cassandraSession.get(); final ResultSetFuture queryFuture = connectionSession.executeAsync(selectQuery); final AtomicLong nrOfRows = new AtomicLong(0L); fileToProcess = session.write(fileToProcess, new OutputStreamCallback() { @Override public void process(final OutputStream out) throws IOException { try { logger.debug("Executing CQL query {}", new Object[] { selectQuery }); final ResultSet resultSet; if (queryTimeout > 0) { resultSet = queryFuture.getUninterruptibly(queryTimeout, TimeUnit.MILLISECONDS); if (AVRO_FORMAT.equals(outputFormat)) { nrOfRows.set( convertToAvroStream(resultSet, out, queryTimeout, TimeUnit.MILLISECONDS)); } else if (JSON_FORMAT.equals(outputFormat)) { nrOfRows.set(convertToJsonStream(resultSet, out, charset, queryTimeout, TimeUnit.MILLISECONDS)); } } else { resultSet = queryFuture.getUninterruptibly(); if (AVRO_FORMAT.equals(outputFormat)) { nrOfRows.set(convertToAvroStream(resultSet, out, 0, null)); } else if (JSON_FORMAT.equals(outputFormat)) { nrOfRows.set(convertToJsonStream(resultSet, out, charset, 0, null)); } } } catch (final TimeoutException | InterruptedException | ExecutionException e) { throw new ProcessException(e); } } }); // set attribute how many rows were selected fileToProcess = session.putAttribute(fileToProcess, RESULT_ROW_COUNT, String.valueOf(nrOfRows.get())); logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { fileToProcess, nrOfRows.get() }); session.getProvenanceReporter().modifyContent(fileToProcess, "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS)); session.transfer(fileToProcess, REL_SUCCESS); } catch (final NoHostAvailableException nhae) { getLogger().error( "No host in the Cassandra cluster can be contacted successfully to execute this query", nhae); // Log up to 10 error messages. Otherwise if a 1000-node cluster was specified but there was no connectivity, // a thousand error messages would be logged. However we would like information from Cassandra itself, so // cap the error limit at 10, format the messages, and don't include the stack trace (it is displayed by the // logger message above). getLogger().error(nhae.getCustomMessage(10, true, false)); fileToProcess = session.penalize(fileToProcess); session.transfer(fileToProcess, REL_RETRY); } catch (final QueryExecutionException qee) { logger.error("Cannot execute the query with the requested consistency level successfully", qee); fileToProcess = session.penalize(fileToProcess); session.transfer(fileToProcess, REL_RETRY); } catch (final QueryValidationException qve) { if (context.hasIncomingConnection()) { logger.error( "The CQL query {} is invalid due to syntax error, authorization issue, or another " + "validation problem; routing {} to failure", new Object[] { selectQuery, fileToProcess }, qve); fileToProcess = session.penalize(fileToProcess); session.transfer(fileToProcess, REL_FAILURE); } else { // This can happen if any exceptions occur while setting up the connection, statement, etc. logger.error("The CQL query {} is invalid due to syntax error, authorization issue, or another " + "validation problem", new Object[] { selectQuery }, qve); session.remove(fileToProcess); context.yield(); } } catch (final ProcessException e) { if (context.hasIncomingConnection()) { logger.error("Unable to execute CQL select query {} for {} due to {}; routing to failure", new Object[] { selectQuery, fileToProcess, e }); fileToProcess = session.penalize(fileToProcess); session.transfer(fileToProcess, REL_FAILURE); } else { logger.error("Unable to execute CQL select query {} due to {}", new Object[] { selectQuery, e }); session.remove(fileToProcess); context.yield(); } } }
From source file:org.apache.nifi.processors.hive.SelectHive3QL.java
private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null); FlowFile flowfile = null;/*w w w. j a v a2 s . c o m*/ // If we have no FlowFile, and all incoming connections are self-loops then we can continue on. // However, if we have no FlowFile and we have connections coming from other Processors, then // we know that we should run only if we have a FlowFile. if (context.hasIncomingConnection()) { if (fileToProcess == null && context.hasNonLoopConnection()) { return; } } final ComponentLog logger = getLogger(); final Hive3DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE) .asControllerService(Hive3DBCPService.class); final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); List<String> preQueries = getQueries( context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); List<String> postQueries = getQueries( context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet()); // Source the SQL String hqlStatement; if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) { hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess) .getValue(); } else { // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query. // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled. final StringBuilder queryContents = new StringBuilder(); session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset))); hqlStatement = queryContents.toString(); } final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess) .asInteger(); final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE) .evaluateAttributeExpressions(fileToProcess).asInteger(); final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger() : 0; final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue(); final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean(); final StopWatch stopWatch = new StopWatch(true); final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER) .evaluateAttributeExpressions(fileToProcess).getValue(); final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER) .evaluateAttributeExpressions(fileToProcess).getValue(); final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean(); final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); final String fragmentIdentifier = UUID.randomUUID().toString(); try (final Connection con = dbcpService .getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes()); final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement())) { Pair<String, SQLException> failure = executeConfigStatements(con, preQueries); if (failure != null) { // In case of failure, assigning config query to "hqlStatement" to follow current error handling hqlStatement = failure.getLeft(); flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw failure.getRight(); } st.setQueryTimeout( context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(fileToProcess).asInteger()); if (fetchSize != null && fetchSize > 0) { try { st.setFetchSize(fetchSize); } catch (SQLException se) { // Not all drivers support this, just log the error (at debug level) and move on logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se); } } final List<FlowFile> resultSetFlowFiles = new ArrayList<>(); try { logger.debug("Executing query {}", new Object[] { hqlStatement }); if (flowbased) { // Hive JDBC Doesn't Support this yet: // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData(); // int paramCount = pmd.getParameterCount(); // Alternate way to determine number of params in SQL. int paramCount = StringUtils.countMatches(hqlStatement, "?"); if (paramCount > 0) { setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes()); } } final ResultSet resultSet; try { resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(hqlStatement)); } catch (SQLException se) { // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw se; } int fragmentIndex = 0; String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null; while (true) { final AtomicLong nrOfRows = new AtomicLong(0L); flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess); if (baseFilename == null) { baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key()); } try { flowfile = session.write(flowfile, out -> { try { if (AVRO.equals(outputFormat)) { nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro)); } else if (CSV.equals(outputFormat)) { CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile); nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options)); } else { nrOfRows.set(0L); throw new ProcessException("Unsupported output format: " + outputFormat); } } catch (final SQLException | RuntimeException e) { throw new ProcessException("Error during database query or conversion of records.", e); } }); } catch (ProcessException e) { // Add flowfile to results before rethrowing so it will be removed from session in outer catch resultSetFlowFiles.add(flowfile); throw e; } if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) { final Map<String, String> attributes = new HashMap<>(); // Set attribute for how many rows were selected attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get())); try { // Set input/output table names by parsing the query attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement))); } catch (Exception e) { // If failed to parse the query, just log a warning message, but continue. getLogger().warn("Failed to parse query: {} due to {}", new Object[] { hqlStatement, e }, e); } // Set MIME type on output document and add extension to filename if (AVRO.equals(outputFormat)) { attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY); attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro"); } else if (CSV.equals(outputFormat)) { attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv"); } if (maxRowsPerFlowFile > 0) { attributes.put("fragment.identifier", fragmentIdentifier); attributes.put("fragment.index", String.valueOf(fragmentIndex)); } flowfile = session.putAllAttributes(flowfile, attributes); logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'", new Object[] { flowfile, nrOfRows.get() }); if (context.hasIncomingConnection()) { // If the flow file came from an incoming connection, issue a Fetch provenance event session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(), "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS)); } else { // If we created a flow file from rows received from Hive, issue a Receive provenance event session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS)); } resultSetFlowFiles.add(flowfile); } else { // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on session.remove(flowfile); if (resultSetFlowFiles != null && resultSetFlowFiles.size() > 0) { flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size() - 1); } break; } fragmentIndex++; if (maxFragments > 0 && fragmentIndex >= maxFragments) { break; } } for (int i = 0; i < resultSetFlowFiles.size(); i++) { // Set count on all FlowFiles if (maxRowsPerFlowFile > 0) { resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex))); } } } catch (final SQLException e) { throw e; } failure = executeConfigStatements(con, postQueries); if (failure != null) { hqlStatement = failure.getLeft(); if (resultSetFlowFiles != null) { resultSetFlowFiles.forEach(ff -> session.remove(ff)); } flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw failure.getRight(); } session.transfer(resultSetFlowFiles, REL_SUCCESS); if (fileToProcess != null) { session.remove(fileToProcess); } } catch (final ProcessException | SQLException e) { logger.error("Issue processing SQL {} due to {}.", new Object[] { hqlStatement, e }); if (flowfile == null) { // This can happen if any exceptions occur while setting up the connection, statement, etc. logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", new Object[] { hqlStatement, e }); context.yield(); } else { if (context.hasIncomingConnection()) { logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", new Object[] { hqlStatement, flowfile, e }); flowfile = session.penalize(flowfile); } else { logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", new Object[] { hqlStatement, e }); context.yield(); } session.transfer(flowfile, REL_FAILURE); } } }