List of usage examples for org.apache.commons.lang StringUtils countMatches
public static int countMatches(String str, String sub)
Counts how many times the substring appears in the larger String.
From source file:org.apache.james.mailbox.model.MailboxAnnotationKey.java
public int countComponents() { return StringUtils.countMatches(key, SLASH_CHARACTER); }
From source file:org.apache.kylin.metadata.filter.function.LikeMatchers.java
public static LikeMatcher createMatcher(String patternStr) { if (patternStr == null) { throw new IllegalArgumentException("pattern is null"); }//from w w w . j a v a 2 s .c om if (patternStr.contains(UNDERSCORE_SIGN)) { return new DefaultLikeMatcher(patternStr); } int count = StringUtils.countMatches(patternStr, PERCENT_SIGN); if (count == 1) { return new OnePercentSignLikeMatcher(patternStr); } else if (count == 2 && patternStr.startsWith(PERCENT_SIGN) && patternStr.endsWith(PERCENT_SIGN)) { return new TwoPercentSignLikeMatcher(patternStr); } else if (count == 3 && patternStr.startsWith(PERCENT_SIGN) && patternStr.endsWith(PERCENT_SIGN) && !patternStr.contains(PERCENT_SIGN + PERCENT_SIGN)) { return new ThreePercentSignLikeMatcher(patternStr); } else { return new DefaultLikeMatcher(patternStr); } }
From source file:org.apache.myfaces.trinidadinternal.renderkit.core.xhtml.TableFormLayoutRenderer.java
/** * Get how many columns has been defined * //from w w w .j a v a 2 s .c o m */ public int _getColumnCount(FacesBean bean) { String columns = this._getColumns(bean); int columnCount; if (columns != null) { columnCount = 1 + StringUtils.countMatches(columns, ";"); } else { columnCount = 1; } return columnCount; }
From source file:org.apache.myfaces.trinidadinternal.renderkit.core.xhtml.TableFormLayoutRenderer.java
/** * Get how many columns has been defined * // ww w . j av a2 s.com */ public int _getRowCount(FacesBean bean) { String rows = this._getRows(bean); int rowCount; if (rows != null) { rowCount = 1 + StringUtils.countMatches(rows, ";"); } else { rowCount = 1; } return rowCount; }
From source file:org.apache.nifi.processors.hive.SelectHive3QL.java
private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null); FlowFile flowfile = null;/*from w ww. j a v a 2 s. c o m*/ // If we have no FlowFile, and all incoming connections are self-loops then we can continue on. // However, if we have no FlowFile and we have connections coming from other Processors, then // we know that we should run only if we have a FlowFile. if (context.hasIncomingConnection()) { if (fileToProcess == null && context.hasNonLoopConnection()) { return; } } final ComponentLog logger = getLogger(); final Hive3DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE) .asControllerService(Hive3DBCPService.class); final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); List<String> preQueries = getQueries( context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); List<String> postQueries = getQueries( context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet()); // Source the SQL String hqlStatement; if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) { hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess) .getValue(); } else { // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query. // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled. final StringBuilder queryContents = new StringBuilder(); session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset))); hqlStatement = queryContents.toString(); } final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess) .asInteger(); final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE) .evaluateAttributeExpressions(fileToProcess).asInteger(); final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger() : 0; final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue(); final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean(); final StopWatch stopWatch = new StopWatch(true); final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER) .evaluateAttributeExpressions(fileToProcess).getValue(); final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER) .evaluateAttributeExpressions(fileToProcess).getValue(); final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean(); final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); final String fragmentIdentifier = UUID.randomUUID().toString(); try (final Connection con = dbcpService .getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes()); final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement())) { Pair<String, SQLException> failure = executeConfigStatements(con, preQueries); if (failure != null) { // In case of failure, assigning config query to "hqlStatement" to follow current error handling hqlStatement = failure.getLeft(); flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw failure.getRight(); } st.setQueryTimeout( context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(fileToProcess).asInteger()); if (fetchSize != null && fetchSize > 0) { try { st.setFetchSize(fetchSize); } catch (SQLException se) { // Not all drivers support this, just log the error (at debug level) and move on logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se); } } final List<FlowFile> resultSetFlowFiles = new ArrayList<>(); try { logger.debug("Executing query {}", new Object[] { hqlStatement }); if (flowbased) { // Hive JDBC Doesn't Support this yet: // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData(); // int paramCount = pmd.getParameterCount(); // Alternate way to determine number of params in SQL. int paramCount = StringUtils.countMatches(hqlStatement, "?"); if (paramCount > 0) { setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes()); } } final ResultSet resultSet; try { resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(hqlStatement)); } catch (SQLException se) { // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw se; } int fragmentIndex = 0; String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null; while (true) { final AtomicLong nrOfRows = new AtomicLong(0L); flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess); if (baseFilename == null) { baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key()); } try { flowfile = session.write(flowfile, out -> { try { if (AVRO.equals(outputFormat)) { nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro)); } else if (CSV.equals(outputFormat)) { CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile); nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options)); } else { nrOfRows.set(0L); throw new ProcessException("Unsupported output format: " + outputFormat); } } catch (final SQLException | RuntimeException e) { throw new ProcessException("Error during database query or conversion of records.", e); } }); } catch (ProcessException e) { // Add flowfile to results before rethrowing so it will be removed from session in outer catch resultSetFlowFiles.add(flowfile); throw e; } if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) { final Map<String, String> attributes = new HashMap<>(); // Set attribute for how many rows were selected attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get())); try { // Set input/output table names by parsing the query attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement))); } catch (Exception e) { // If failed to parse the query, just log a warning message, but continue. getLogger().warn("Failed to parse query: {} due to {}", new Object[] { hqlStatement, e }, e); } // Set MIME type on output document and add extension to filename if (AVRO.equals(outputFormat)) { attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY); attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro"); } else if (CSV.equals(outputFormat)) { attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv"); } if (maxRowsPerFlowFile > 0) { attributes.put("fragment.identifier", fragmentIdentifier); attributes.put("fragment.index", String.valueOf(fragmentIndex)); } flowfile = session.putAllAttributes(flowfile, attributes); logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'", new Object[] { flowfile, nrOfRows.get() }); if (context.hasIncomingConnection()) { // If the flow file came from an incoming connection, issue a Fetch provenance event session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(), "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS)); } else { // If we created a flow file from rows received from Hive, issue a Receive provenance event session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS)); } resultSetFlowFiles.add(flowfile); } else { // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on session.remove(flowfile); if (resultSetFlowFiles != null && resultSetFlowFiles.size() > 0) { flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size() - 1); } break; } fragmentIndex++; if (maxFragments > 0 && fragmentIndex >= maxFragments) { break; } } for (int i = 0; i < resultSetFlowFiles.size(); i++) { // Set count on all FlowFiles if (maxRowsPerFlowFile > 0) { resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex))); } } } catch (final SQLException e) { throw e; } failure = executeConfigStatements(con, postQueries); if (failure != null) { hqlStatement = failure.getLeft(); if (resultSetFlowFiles != null) { resultSetFlowFiles.forEach(ff -> session.remove(ff)); } flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw failure.getRight(); } session.transfer(resultSetFlowFiles, REL_SUCCESS); if (fileToProcess != null) { session.remove(fileToProcess); } } catch (final ProcessException | SQLException e) { logger.error("Issue processing SQL {} due to {}.", new Object[] { hqlStatement, e }); if (flowfile == null) { // This can happen if any exceptions occur while setting up the connection, statement, etc. logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", new Object[] { hqlStatement, e }); context.yield(); } else { if (context.hasIncomingConnection()) { logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", new Object[] { hqlStatement, flowfile, e }); flowfile = session.penalize(flowfile); } else { logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", new Object[] { hqlStatement, e }); context.yield(); } session.transfer(flowfile, REL_FAILURE); } } }
From source file:org.apache.nifi.processors.hive.SelectHiveQL.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null); FlowFile flowfile = null;/*ww w. j av a 2 s. c o m*/ // If we have no FlowFile, and all incoming connections are self-loops then we can continue on. // However, if we have no FlowFile and we have connections coming from other Processors, then // we know that we should run only if we have a FlowFile. if (context.hasIncomingConnection()) { if (fileToProcess == null && context.hasNonLoopConnection()) { return; } } final ComponentLog logger = getLogger(); final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE) .asControllerService(HiveDBCPService.class); final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet()); // Source the SQL final String selectQuery; if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) { selectQuery = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess) .getValue(); } else { // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query. // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled. final StringBuilder queryContents = new StringBuilder(); session.read(fileToProcess, new InputStreamCallback() { @Override public void process(InputStream in) throws IOException { queryContents.append(IOUtils.toString(in)); } }); selectQuery = queryContents.toString(); } final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue(); final StopWatch stopWatch = new StopWatch(true); final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER) .evaluateAttributeExpressions(fileToProcess).getValue(); final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER) .evaluateAttributeExpressions(fileToProcess).getValue(); final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean(); final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); try (final Connection con = dbcpService.getConnection(); final Statement st = (flowbased ? con.prepareStatement(selectQuery) : con.createStatement())) { final AtomicLong nrOfRows = new AtomicLong(0L); if (fileToProcess == null) { flowfile = session.create(); } else { flowfile = fileToProcess; } flowfile = session.write(flowfile, new OutputStreamCallback() { @Override public void process(final OutputStream out) throws IOException { try { logger.debug("Executing query {}", new Object[] { selectQuery }); if (flowbased) { // Hive JDBC Doesn't Support this yet: // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData(); // int paramCount = pmd.getParameterCount(); // Alternate way to determine number of params in SQL. int paramCount = StringUtils.countMatches(selectQuery, "?"); if (paramCount > 0) { setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes()); } } final ResultSet resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(selectQuery)); if (AVRO.equals(outputFormat)) { nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out)); } else if (CSV.equals(outputFormat)) { CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape); nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options)); } else { nrOfRows.set(0L); throw new ProcessException("Unsupported output format: " + outputFormat); } } catch (final SQLException e) { throw new ProcessException(e); } } }); // Set attribute for how many rows were selected flowfile = session.putAttribute(flowfile, RESULT_ROW_COUNT, String.valueOf(nrOfRows.get())); // Set MIME type on output document and add extension to filename if (AVRO.equals(outputFormat)) { flowfile = session.putAttribute(flowfile, CoreAttributes.MIME_TYPE.key(), AVRO_MIME_TYPE); flowfile = session.putAttribute(flowfile, CoreAttributes.FILENAME.key(), flowfile.getAttribute(CoreAttributes.FILENAME.key()) + ".avro"); } else if (CSV.equals(outputFormat)) { flowfile = session.putAttribute(flowfile, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); flowfile = session.putAttribute(flowfile, CoreAttributes.FILENAME.key(), flowfile.getAttribute(CoreAttributes.FILENAME.key()) + ".csv"); } logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { flowfile, nrOfRows.get() }); if (context.hasIncomingConnection()) { // If the flow file came from an incoming connection, issue a Modify Content provenance event session.getProvenanceReporter().modifyContent(flowfile, "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS)); } else { // If we created a flow file from rows received from Hive, issue a Receive provenance event session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS)); } session.transfer(flowfile, REL_SUCCESS); } catch (final ProcessException | SQLException e) { logger.error("Issue processing SQL {} due to {}.", new Object[] { selectQuery, e }); if (flowfile == null) { // This can happen if any exceptions occur while setting up the connection, statement, etc. logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", new Object[] { selectQuery, e }); context.yield(); } else { if (context.hasIncomingConnection()) { logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", new Object[] { selectQuery, flowfile, e }); flowfile = session.penalize(flowfile); } else { logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", new Object[] { selectQuery, e }); context.yield(); } session.transfer(flowfile, REL_FAILURE); } } finally { } }
From source file:org.apache.nifi.processors.hive.SelectHive_1_1QL.java
private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null); FlowFile flowfile = null;/* w ww . j a v a 2 s.c o m*/ // If we have no FlowFile, and all incoming connections are self-loops then we can continue on. // However, if we have no FlowFile and we have connections coming from other Processors, then // we know that we should run only if we have a FlowFile. if (context.hasIncomingConnection()) { if (fileToProcess == null && context.hasNonLoopConnection()) { return; } } final ComponentLog logger = getLogger(); final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE) .asControllerService(Hive_1_1DBCPService.class); final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); List<String> preQueries = getQueries( context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); List<String> postQueries = getQueries( context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet()); // Source the SQL String hqlStatement; if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) { hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess) .getValue(); } else { // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query. // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled. final StringBuilder queryContents = new StringBuilder(); session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset))); hqlStatement = queryContents.toString(); } final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess) .asInteger(); final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE) .evaluateAttributeExpressions(fileToProcess).asInteger(); final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger() : 0; final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue(); final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean(); final StopWatch stopWatch = new StopWatch(true); final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER) .evaluateAttributeExpressions(fileToProcess).getValue(); final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER) .evaluateAttributeExpressions(fileToProcess).getValue(); final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean(); final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); final String fragmentIdentifier = UUID.randomUUID().toString(); try (final Connection con = dbcpService .getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes()); final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement())) { Pair<String, SQLException> failure = executeConfigStatements(con, preQueries); if (failure != null) { // In case of failure, assigning config query to "hqlStatement" to follow current error handling hqlStatement = failure.getLeft(); flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw failure.getRight(); } if (fetchSize != null && fetchSize > 0) { try { st.setFetchSize(fetchSize); } catch (SQLException se) { // Not all drivers support this, just log the error (at debug level) and move on logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se); } } final List<FlowFile> resultSetFlowFiles = new ArrayList<>(); try { logger.debug("Executing query {}", new Object[] { hqlStatement }); if (flowbased) { // Hive JDBC Doesn't Support this yet: // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData(); // int paramCount = pmd.getParameterCount(); // Alternate way to determine number of params in SQL. int paramCount = StringUtils.countMatches(hqlStatement, "?"); if (paramCount > 0) { setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes()); } } final ResultSet resultSet; try { resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(hqlStatement)); } catch (SQLException se) { // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw se; } int fragmentIndex = 0; String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null; while (true) { final AtomicLong nrOfRows = new AtomicLong(0L); flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess); if (baseFilename == null) { baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key()); } try { flowfile = session.write(flowfile, out -> { try { if (AVRO.equals(outputFormat)) { nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro)); } else if (CSV.equals(outputFormat)) { CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile); nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options)); } else { nrOfRows.set(0L); throw new ProcessException("Unsupported output format: " + outputFormat); } } catch (final SQLException | RuntimeException e) { throw new ProcessException("Error during database query or conversion of records.", e); } }); } catch (ProcessException e) { // Add flowfile to results before rethrowing so it will be removed from session in outer catch resultSetFlowFiles.add(flowfile); throw e; } if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) { final Map<String, String> attributes = new HashMap<>(); // Set attribute for how many rows were selected attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get())); try { // Set input/output table names by parsing the query attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement))); } catch (Exception e) { // If failed to parse the query, just log a warning message, but continue. getLogger().warn("Failed to parse query: {} due to {}", new Object[] { hqlStatement, e }, e); } // Set MIME type on output document and add extension to filename if (AVRO.equals(outputFormat)) { attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY); attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro"); } else if (CSV.equals(outputFormat)) { attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv"); } if (maxRowsPerFlowFile > 0) { attributes.put("fragment.identifier", fragmentIdentifier); attributes.put("fragment.index", String.valueOf(fragmentIndex)); } flowfile = session.putAllAttributes(flowfile, attributes); logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'", new Object[] { flowfile, nrOfRows.get() }); if (context.hasIncomingConnection()) { // If the flow file came from an incoming connection, issue a Fetch provenance event session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(), "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS)); } else { // If we created a flow file from rows received from Hive, issue a Receive provenance event session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS)); } resultSetFlowFiles.add(flowfile); } else { // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on session.remove(flowfile); if (resultSetFlowFiles != null && resultSetFlowFiles.size() > 0) { flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size() - 1); } break; } fragmentIndex++; if (maxFragments > 0 && fragmentIndex >= maxFragments) { break; } } for (int i = 0; i < resultSetFlowFiles.size(); i++) { // Set count on all FlowFiles if (maxRowsPerFlowFile > 0) { resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex))); } } } catch (final SQLException e) { throw e; } failure = executeConfigStatements(con, postQueries); if (failure != null) { hqlStatement = failure.getLeft(); if (resultSetFlowFiles != null) { resultSetFlowFiles.forEach(ff -> session.remove(ff)); } flowfile = (fileToProcess == null) ? session.create() : fileToProcess; fileToProcess = null; throw failure.getRight(); } session.transfer(resultSetFlowFiles, REL_SUCCESS); if (fileToProcess != null) { session.remove(fileToProcess); } } catch (final ProcessException | SQLException e) { logger.error("Issue processing SQL {} due to {}.", new Object[] { hqlStatement, e }); if (flowfile == null) { // This can happen if any exceptions occur while setting up the connection, statement, etc. logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", new Object[] { hqlStatement, e }); context.yield(); } else { if (context.hasIncomingConnection()) { logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", new Object[] { hqlStatement, flowfile, e }); flowfile = session.penalize(flowfile); } else { logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", new Object[] { hqlStatement, e }); context.yield(); } session.transfer(flowfile, REL_FAILURE); } } }
From source file:org.apache.pig.tez.TestTezAutoParallelism.java
@Test public void testIncreaseIntermediateParallelism1() throws IOException { // User specified parallelism is overriden for intermediate step String outputDir = "/tmp/testIncreaseIntermediateParallelism"; String script = "A = load '" + INPUT_FILE1 + "' as (name:chararray, age:int);" + "B = load '" + INPUT_FILE2 + "' as (name:chararray, gender:chararray);" + "C = join A by name, B by name using 'skewed' parallel 1;" + "D = group C by A::name;" + "E = foreach D generate group, COUNT(C.A::name);" + "STORE E into '" + outputDir + "/finalout';"; String log = testIncreaseIntermediateParallelism(script, outputDir, true); // Parallelism of C should be increased assertTrue(log.contains("Increased requested parallelism of scope-59 to 4")); assertEquals(1, StringUtils.countMatches(log, "Increased requested parallelism")); assertTrue(log.contains("Total estimated parallelism is 40")); }
From source file:org.apache.pig.tez.TestTezAutoParallelism.java
@Test public void testIncreaseIntermediateParallelism2() throws IOException { // User specified parallelism should not be overriden for intermediate step if there is a STORE String outputDir = "/tmp/testIncreaseIntermediateParallelism"; String script = "A = load '" + INPUT_FILE1 + "' as (name:chararray, age:int);" + "B = load '" + INPUT_FILE2 + "' as (name:chararray, gender:chararray);" + "C = join A by name, B by name using 'skewed' parallel 2;" + "STORE C into '/tmp/testIncreaseIntermediateParallelism';" + "D = group C by A::name parallel 2;" + "E = foreach D generate group, COUNT(C.A::name);" + "STORE E into '" + outputDir + "/finalout';"; String log = testIncreaseIntermediateParallelism(script, outputDir, true); // Parallelism of C will not be increased as the Split has a STORE assertEquals(0, StringUtils.countMatches(log, "Increased requested parallelism")); }
From source file:org.apache.pig.tez.TestTezAutoParallelism.java
@Test public void testIncreaseIntermediateParallelism3() throws IOException { // Multiple levels with default parallelism. Group by followed by Group by try {// w w w. j a v a2 s. co m String outputDir = "/tmp/testIncreaseIntermediateParallelism"; String script = "set default_parallel 1\n" + "A = load '" + INPUT_FILE1 + "' as (name:chararray, age:int);" + "B = load '" + INPUT_FILE2 + "' as (name:chararray, gender:chararray);" + "C = join A by name, B by name;" + "STORE C into '/tmp/testIncreaseIntermediateParallelism';" + "C1 = group C by A::name;" + "C2 = FOREACH C1 generate group, FLATTEN(C);" + "D = group C2 by group;" + "E = foreach D generate group, COUNT(C2.A::name);" + "F = order E by $0;" + "STORE F into '" + outputDir + "/finalout';"; String log = testIncreaseIntermediateParallelism(script, outputDir, false); // Parallelism of C1 should be increased. C2 will not be increased due to order by assertEquals(1, StringUtils.countMatches(log, "Increased requested parallelism")); assertTrue(log.contains("Increased requested parallelism of scope-65 to 10")); assertTrue(log.contains("Total estimated parallelism is 19")); } finally { pigServer.setDefaultParallel(-1); } }