Example usage for org.apache.commons.lang StringUtils countMatches

List of usage examples for org.apache.commons.lang StringUtils countMatches

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils countMatches.

Prototype

public static int countMatches(String str, String sub) 

Source Link

Document

Counts how many times the substring appears in the larger String.

Usage

From source file:org.apache.james.mailbox.model.MailboxAnnotationKey.java

public int countComponents() {
    return StringUtils.countMatches(key, SLASH_CHARACTER);
}

From source file:org.apache.kylin.metadata.filter.function.LikeMatchers.java

public static LikeMatcher createMatcher(String patternStr) {
    if (patternStr == null) {
        throw new IllegalArgumentException("pattern is null");
    }//from   w  w w .  j  a  v  a 2 s .c  om

    if (patternStr.contains(UNDERSCORE_SIGN)) {
        return new DefaultLikeMatcher(patternStr);
    }

    int count = StringUtils.countMatches(patternStr, PERCENT_SIGN);
    if (count == 1) {
        return new OnePercentSignLikeMatcher(patternStr);
    } else if (count == 2 && patternStr.startsWith(PERCENT_SIGN) && patternStr.endsWith(PERCENT_SIGN)) {
        return new TwoPercentSignLikeMatcher(patternStr);
    } else if (count == 3 && patternStr.startsWith(PERCENT_SIGN) && patternStr.endsWith(PERCENT_SIGN)
            && !patternStr.contains(PERCENT_SIGN + PERCENT_SIGN)) {
        return new ThreePercentSignLikeMatcher(patternStr);
    } else {
        return new DefaultLikeMatcher(patternStr);
    }
}

From source file:org.apache.myfaces.trinidadinternal.renderkit.core.xhtml.TableFormLayoutRenderer.java

/**
 * Get how many columns has been defined
 * //from   w w  w  .j a  v  a  2  s  .c  o  m
 */
public int _getColumnCount(FacesBean bean) {
    String columns = this._getColumns(bean);
    int columnCount;
    if (columns != null) {
        columnCount = 1 + StringUtils.countMatches(columns, ";");
    } else {
        columnCount = 1;
    }
    return columnCount;
}

From source file:org.apache.myfaces.trinidadinternal.renderkit.core.xhtml.TableFormLayoutRenderer.java

/**
 * Get how many columns has been defined
 * //  ww  w . j  av a2  s.com
 */
public int _getRowCount(FacesBean bean) {

    String rows = this._getRows(bean);
    int rowCount;
    if (rows != null) {
        rowCount = 1 + StringUtils.countMatches(rows, ";");
    } else {
        rowCount = 1;
    }
    return rowCount;
}

From source file:org.apache.nifi.processors.hive.SelectHive3QL.java

private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null);
    FlowFile flowfile = null;/*from  w  ww. j  a v a 2 s. c  o m*/

    // If we have no FlowFile, and all incoming connections are self-loops then we can continue on.
    // However, if we have no FlowFile and we have connections coming from other Processors, then
    // we know that we should run only if we have a FlowFile.
    if (context.hasIncomingConnection()) {
        if (fileToProcess == null && context.hasNonLoopConnection()) {
            return;
        }
    }

    final ComponentLog logger = getLogger();
    final Hive3DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE)
            .asControllerService(Hive3DBCPService.class);
    final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());

    List<String> preQueries = getQueries(
            context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue());
    List<String> postQueries = getQueries(
            context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue());

    final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet());

    // Source the SQL
    String hqlStatement;

    if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) {
        hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess)
                .getValue();
    } else {
        // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query.
        // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled.
        final StringBuilder queryContents = new StringBuilder();
        session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset)));
        hqlStatement = queryContents.toString();
    }

    final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess)
            .asInteger();
    final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE)
            .evaluateAttributeExpressions(fileToProcess).asInteger();
    final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet()
            ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger()
            : 0;
    final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue();
    final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean();
    final StopWatch stopWatch = new StopWatch(true);
    final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
    final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER)
            .evaluateAttributeExpressions(fileToProcess).getValue();
    final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER)
            .evaluateAttributeExpressions(fileToProcess).getValue();
    final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean();
    final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
    final String fragmentIdentifier = UUID.randomUUID().toString();

    try (final Connection con = dbcpService
            .getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes());
            final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement())) {
        Pair<String, SQLException> failure = executeConfigStatements(con, preQueries);
        if (failure != null) {
            // In case of failure, assigning config query to "hqlStatement"  to follow current error handling
            hqlStatement = failure.getLeft();
            flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
            fileToProcess = null;
            throw failure.getRight();
        }
        st.setQueryTimeout(
                context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(fileToProcess).asInteger());

        if (fetchSize != null && fetchSize > 0) {
            try {
                st.setFetchSize(fetchSize);
            } catch (SQLException se) {
                // Not all drivers support this, just log the error (at debug level) and move on
                logger.debug("Cannot set fetch size to {} due to {}",
                        new Object[] { fetchSize, se.getLocalizedMessage() }, se);
            }
        }

        final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
        try {
            logger.debug("Executing query {}", new Object[] { hqlStatement });
            if (flowbased) {
                // Hive JDBC Doesn't Support this yet:
                // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData();
                // int paramCount = pmd.getParameterCount();

                // Alternate way to determine number of params in SQL.
                int paramCount = StringUtils.countMatches(hqlStatement, "?");

                if (paramCount > 0) {
                    setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes());
                }
            }

            final ResultSet resultSet;

            try {
                resultSet = (flowbased ? ((PreparedStatement) st).executeQuery()
                        : st.executeQuery(hqlStatement));
            } catch (SQLException se) {
                // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here
                flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
                fileToProcess = null;
                throw se;
            }

            int fragmentIndex = 0;
            String baseFilename = (fileToProcess != null)
                    ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key())
                    : null;
            while (true) {
                final AtomicLong nrOfRows = new AtomicLong(0L);
                flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess);
                if (baseFilename == null) {
                    baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key());
                }
                try {
                    flowfile = session.write(flowfile, out -> {
                        try {
                            if (AVRO.equals(outputFormat)) {
                                nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out,
                                        maxRowsPerFlowFile, convertNamesForAvro));
                            } else if (CSV.equals(outputFormat)) {
                                CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter,
                                        quote, escape, maxRowsPerFlowFile);
                                nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options));
                            } else {
                                nrOfRows.set(0L);
                                throw new ProcessException("Unsupported output format: " + outputFormat);
                            }
                        } catch (final SQLException | RuntimeException e) {
                            throw new ProcessException("Error during database query or conversion of records.",
                                    e);
                        }
                    });
                } catch (ProcessException e) {
                    // Add flowfile to results before rethrowing so it will be removed from session in outer catch
                    resultSetFlowFiles.add(flowfile);
                    throw e;
                }

                if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) {
                    final Map<String, String> attributes = new HashMap<>();
                    // Set attribute for how many rows were selected
                    attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));

                    try {
                        // Set input/output table names by parsing the query
                        attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement)));
                    } catch (Exception e) {
                        // If failed to parse the query, just log a warning message, but continue.
                        getLogger().warn("Failed to parse query: {} due to {}",
                                new Object[] { hqlStatement, e }, e);
                    }

                    // Set MIME type on output document and add extension to filename
                    if (AVRO.equals(outputFormat)) {
                        attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY);
                        attributes.put(CoreAttributes.FILENAME.key(),
                                baseFilename + "." + fragmentIndex + ".avro");
                    } else if (CSV.equals(outputFormat)) {
                        attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
                        attributes.put(CoreAttributes.FILENAME.key(),
                                baseFilename + "." + fragmentIndex + ".csv");
                    }

                    if (maxRowsPerFlowFile > 0) {
                        attributes.put("fragment.identifier", fragmentIdentifier);
                        attributes.put("fragment.index", String.valueOf(fragmentIndex));
                    }

                    flowfile = session.putAllAttributes(flowfile, attributes);

                    logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'",
                            new Object[] { flowfile, nrOfRows.get() });

                    if (context.hasIncomingConnection()) {
                        // If the flow file came from an incoming connection, issue a Fetch provenance event
                        session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(),
                                "Retrieved " + nrOfRows.get() + " rows",
                                stopWatch.getElapsed(TimeUnit.MILLISECONDS));
                    } else {
                        // If we created a flow file from rows received from Hive, issue a Receive provenance event
                        session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(),
                                stopWatch.getElapsed(TimeUnit.MILLISECONDS));
                    }
                    resultSetFlowFiles.add(flowfile);
                } else {
                    // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on
                    session.remove(flowfile);
                    if (resultSetFlowFiles != null && resultSetFlowFiles.size() > 0) {
                        flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size() - 1);
                    }
                    break;
                }

                fragmentIndex++;
                if (maxFragments > 0 && fragmentIndex >= maxFragments) {
                    break;
                }
            }

            for (int i = 0; i < resultSetFlowFiles.size(); i++) {
                // Set count on all FlowFiles
                if (maxRowsPerFlowFile > 0) {
                    resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count",
                            Integer.toString(fragmentIndex)));
                }
            }

        } catch (final SQLException e) {
            throw e;
        }

        failure = executeConfigStatements(con, postQueries);
        if (failure != null) {
            hqlStatement = failure.getLeft();
            if (resultSetFlowFiles != null) {
                resultSetFlowFiles.forEach(ff -> session.remove(ff));
            }
            flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
            fileToProcess = null;
            throw failure.getRight();
        }

        session.transfer(resultSetFlowFiles, REL_SUCCESS);
        if (fileToProcess != null) {
            session.remove(fileToProcess);
        }

    } catch (final ProcessException | SQLException e) {
        logger.error("Issue processing SQL {} due to {}.", new Object[] { hqlStatement, e });
        if (flowfile == null) {
            // This can happen if any exceptions occur while setting up the connection, statement, etc.
            logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure",
                    new Object[] { hqlStatement, e });
            context.yield();
        } else {
            if (context.hasIncomingConnection()) {
                logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure",
                        new Object[] { hqlStatement, flowfile, e });
                flowfile = session.penalize(flowfile);
            } else {
                logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure",
                        new Object[] { hqlStatement, e });
                context.yield();
            }
            session.transfer(flowfile, REL_FAILURE);
        }
    }
}

From source file:org.apache.nifi.processors.hive.SelectHiveQL.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null);
    FlowFile flowfile = null;/*ww  w.  j  av a 2  s.  c o  m*/

    // If we have no FlowFile, and all incoming connections are self-loops then we can continue on.
    // However, if we have no FlowFile and we have connections coming from other Processors, then
    // we know that we should run only if we have a FlowFile.
    if (context.hasIncomingConnection()) {
        if (fileToProcess == null && context.hasNonLoopConnection()) {
            return;
        }
    }

    final ComponentLog logger = getLogger();
    final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE)
            .asControllerService(HiveDBCPService.class);
    final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());

    final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet());

    // Source the SQL
    final String selectQuery;

    if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) {
        selectQuery = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess)
                .getValue();
    } else {
        // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query.
        // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled.
        final StringBuilder queryContents = new StringBuilder();
        session.read(fileToProcess, new InputStreamCallback() {
            @Override
            public void process(InputStream in) throws IOException {
                queryContents.append(IOUtils.toString(in));
            }
        });
        selectQuery = queryContents.toString();
    }

    final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue();
    final StopWatch stopWatch = new StopWatch(true);
    final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
    final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER)
            .evaluateAttributeExpressions(fileToProcess).getValue();
    final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER)
            .evaluateAttributeExpressions(fileToProcess).getValue();
    final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean();
    final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();

    try (final Connection con = dbcpService.getConnection();
            final Statement st = (flowbased ? con.prepareStatement(selectQuery) : con.createStatement())) {

        final AtomicLong nrOfRows = new AtomicLong(0L);
        if (fileToProcess == null) {
            flowfile = session.create();
        } else {
            flowfile = fileToProcess;
        }

        flowfile = session.write(flowfile, new OutputStreamCallback() {
            @Override
            public void process(final OutputStream out) throws IOException {
                try {
                    logger.debug("Executing query {}", new Object[] { selectQuery });
                    if (flowbased) {
                        // Hive JDBC Doesn't Support this yet:
                        // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData();
                        // int paramCount = pmd.getParameterCount();

                        // Alternate way to determine number of params in SQL.
                        int paramCount = StringUtils.countMatches(selectQuery, "?");

                        if (paramCount > 0) {
                            setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes());
                        }
                    }

                    final ResultSet resultSet = (flowbased ? ((PreparedStatement) st).executeQuery()
                            : st.executeQuery(selectQuery));

                    if (AVRO.equals(outputFormat)) {
                        nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out));
                    } else if (CSV.equals(outputFormat)) {
                        CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote,
                                escape);
                        nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options));
                    } else {
                        nrOfRows.set(0L);
                        throw new ProcessException("Unsupported output format: " + outputFormat);
                    }
                } catch (final SQLException e) {
                    throw new ProcessException(e);
                }
            }
        });

        // Set attribute for how many rows were selected
        flowfile = session.putAttribute(flowfile, RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));

        // Set MIME type on output document and add extension to filename
        if (AVRO.equals(outputFormat)) {
            flowfile = session.putAttribute(flowfile, CoreAttributes.MIME_TYPE.key(), AVRO_MIME_TYPE);
            flowfile = session.putAttribute(flowfile, CoreAttributes.FILENAME.key(),
                    flowfile.getAttribute(CoreAttributes.FILENAME.key()) + ".avro");
        } else if (CSV.equals(outputFormat)) {
            flowfile = session.putAttribute(flowfile, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
            flowfile = session.putAttribute(flowfile, CoreAttributes.FILENAME.key(),
                    flowfile.getAttribute(CoreAttributes.FILENAME.key()) + ".csv");
        }

        logger.info("{} contains {} Avro records; transferring to 'success'",
                new Object[] { flowfile, nrOfRows.get() });

        if (context.hasIncomingConnection()) {
            // If the flow file came from an incoming connection, issue a Modify Content provenance event

            session.getProvenanceReporter().modifyContent(flowfile, "Retrieved " + nrOfRows.get() + " rows",
                    stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        } else {
            // If we created a flow file from rows received from Hive, issue a Receive provenance event
            session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(),
                    stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        }
        session.transfer(flowfile, REL_SUCCESS);
    } catch (final ProcessException | SQLException e) {
        logger.error("Issue processing SQL {} due to {}.", new Object[] { selectQuery, e });
        if (flowfile == null) {
            // This can happen if any exceptions occur while setting up the connection, statement, etc.
            logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure",
                    new Object[] { selectQuery, e });
            context.yield();
        } else {
            if (context.hasIncomingConnection()) {
                logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure",
                        new Object[] { selectQuery, flowfile, e });
                flowfile = session.penalize(flowfile);
            } else {
                logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure",
                        new Object[] { selectQuery, e });
                context.yield();
            }
            session.transfer(flowfile, REL_FAILURE);
        }
    } finally {

    }
}

From source file:org.apache.nifi.processors.hive.SelectHive_1_1QL.java

private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null);
    FlowFile flowfile = null;/*  w  ww  . j a  v  a  2  s.c  o  m*/

    // If we have no FlowFile, and all incoming connections are self-loops then we can continue on.
    // However, if we have no FlowFile and we have connections coming from other Processors, then
    // we know that we should run only if we have a FlowFile.
    if (context.hasIncomingConnection()) {
        if (fileToProcess == null && context.hasNonLoopConnection()) {
            return;
        }
    }

    final ComponentLog logger = getLogger();
    final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE)
            .asControllerService(Hive_1_1DBCPService.class);
    final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());

    List<String> preQueries = getQueries(
            context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue());
    List<String> postQueries = getQueries(
            context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue());

    final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet());

    // Source the SQL
    String hqlStatement;

    if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) {
        hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess)
                .getValue();
    } else {
        // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query.
        // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled.
        final StringBuilder queryContents = new StringBuilder();
        session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset)));
        hqlStatement = queryContents.toString();
    }

    final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess)
            .asInteger();
    final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE)
            .evaluateAttributeExpressions(fileToProcess).asInteger();
    final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet()
            ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger()
            : 0;
    final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue();
    final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean();
    final StopWatch stopWatch = new StopWatch(true);
    final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
    final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER)
            .evaluateAttributeExpressions(fileToProcess).getValue();
    final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER)
            .evaluateAttributeExpressions(fileToProcess).getValue();
    final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean();
    final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
    final String fragmentIdentifier = UUID.randomUUID().toString();

    try (final Connection con = dbcpService
            .getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes());
            final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement())) {
        Pair<String, SQLException> failure = executeConfigStatements(con, preQueries);
        if (failure != null) {
            // In case of failure, assigning config query to "hqlStatement"  to follow current error handling
            hqlStatement = failure.getLeft();
            flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
            fileToProcess = null;
            throw failure.getRight();
        }
        if (fetchSize != null && fetchSize > 0) {
            try {
                st.setFetchSize(fetchSize);
            } catch (SQLException se) {
                // Not all drivers support this, just log the error (at debug level) and move on
                logger.debug("Cannot set fetch size to {} due to {}",
                        new Object[] { fetchSize, se.getLocalizedMessage() }, se);
            }
        }

        final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
        try {
            logger.debug("Executing query {}", new Object[] { hqlStatement });
            if (flowbased) {
                // Hive JDBC Doesn't Support this yet:
                // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData();
                // int paramCount = pmd.getParameterCount();

                // Alternate way to determine number of params in SQL.
                int paramCount = StringUtils.countMatches(hqlStatement, "?");

                if (paramCount > 0) {
                    setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes());
                }
            }

            final ResultSet resultSet;

            try {
                resultSet = (flowbased ? ((PreparedStatement) st).executeQuery()
                        : st.executeQuery(hqlStatement));
            } catch (SQLException se) {
                // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here
                flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
                fileToProcess = null;
                throw se;
            }

            int fragmentIndex = 0;
            String baseFilename = (fileToProcess != null)
                    ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key())
                    : null;
            while (true) {
                final AtomicLong nrOfRows = new AtomicLong(0L);
                flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess);
                if (baseFilename == null) {
                    baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key());
                }
                try {
                    flowfile = session.write(flowfile, out -> {
                        try {
                            if (AVRO.equals(outputFormat)) {
                                nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out,
                                        maxRowsPerFlowFile, convertNamesForAvro));
                            } else if (CSV.equals(outputFormat)) {
                                CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter,
                                        quote, escape, maxRowsPerFlowFile);
                                nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options));
                            } else {
                                nrOfRows.set(0L);
                                throw new ProcessException("Unsupported output format: " + outputFormat);
                            }
                        } catch (final SQLException | RuntimeException e) {
                            throw new ProcessException("Error during database query or conversion of records.",
                                    e);
                        }
                    });
                } catch (ProcessException e) {
                    // Add flowfile to results before rethrowing so it will be removed from session in outer catch
                    resultSetFlowFiles.add(flowfile);
                    throw e;
                }

                if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) {
                    final Map<String, String> attributes = new HashMap<>();
                    // Set attribute for how many rows were selected
                    attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));

                    try {
                        // Set input/output table names by parsing the query
                        attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement)));
                    } catch (Exception e) {
                        // If failed to parse the query, just log a warning message, but continue.
                        getLogger().warn("Failed to parse query: {} due to {}",
                                new Object[] { hqlStatement, e }, e);
                    }

                    // Set MIME type on output document and add extension to filename
                    if (AVRO.equals(outputFormat)) {
                        attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY);
                        attributes.put(CoreAttributes.FILENAME.key(),
                                baseFilename + "." + fragmentIndex + ".avro");
                    } else if (CSV.equals(outputFormat)) {
                        attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
                        attributes.put(CoreAttributes.FILENAME.key(),
                                baseFilename + "." + fragmentIndex + ".csv");
                    }

                    if (maxRowsPerFlowFile > 0) {
                        attributes.put("fragment.identifier", fragmentIdentifier);
                        attributes.put("fragment.index", String.valueOf(fragmentIndex));
                    }

                    flowfile = session.putAllAttributes(flowfile, attributes);

                    logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'",
                            new Object[] { flowfile, nrOfRows.get() });

                    if (context.hasIncomingConnection()) {
                        // If the flow file came from an incoming connection, issue a Fetch provenance event
                        session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(),
                                "Retrieved " + nrOfRows.get() + " rows",
                                stopWatch.getElapsed(TimeUnit.MILLISECONDS));
                    } else {
                        // If we created a flow file from rows received from Hive, issue a Receive provenance event
                        session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(),
                                stopWatch.getElapsed(TimeUnit.MILLISECONDS));
                    }
                    resultSetFlowFiles.add(flowfile);
                } else {
                    // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on
                    session.remove(flowfile);
                    if (resultSetFlowFiles != null && resultSetFlowFiles.size() > 0) {
                        flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size() - 1);
                    }
                    break;
                }

                fragmentIndex++;
                if (maxFragments > 0 && fragmentIndex >= maxFragments) {
                    break;
                }
            }

            for (int i = 0; i < resultSetFlowFiles.size(); i++) {
                // Set count on all FlowFiles
                if (maxRowsPerFlowFile > 0) {
                    resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count",
                            Integer.toString(fragmentIndex)));
                }
            }

        } catch (final SQLException e) {
            throw e;
        }

        failure = executeConfigStatements(con, postQueries);
        if (failure != null) {
            hqlStatement = failure.getLeft();
            if (resultSetFlowFiles != null) {
                resultSetFlowFiles.forEach(ff -> session.remove(ff));
            }
            flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
            fileToProcess = null;
            throw failure.getRight();
        }

        session.transfer(resultSetFlowFiles, REL_SUCCESS);
        if (fileToProcess != null) {
            session.remove(fileToProcess);
        }
    } catch (final ProcessException | SQLException e) {
        logger.error("Issue processing SQL {} due to {}.", new Object[] { hqlStatement, e });
        if (flowfile == null) {
            // This can happen if any exceptions occur while setting up the connection, statement, etc.
            logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure",
                    new Object[] { hqlStatement, e });
            context.yield();
        } else {
            if (context.hasIncomingConnection()) {
                logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure",
                        new Object[] { hqlStatement, flowfile, e });
                flowfile = session.penalize(flowfile);
            } else {
                logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure",
                        new Object[] { hqlStatement, e });
                context.yield();
            }
            session.transfer(flowfile, REL_FAILURE);
        }
    }
}

From source file:org.apache.pig.tez.TestTezAutoParallelism.java

@Test
public void testIncreaseIntermediateParallelism1() throws IOException {
    // User specified parallelism is overriden for intermediate step
    String outputDir = "/tmp/testIncreaseIntermediateParallelism";
    String script = "A = load '" + INPUT_FILE1 + "' as (name:chararray, age:int);" + "B = load '" + INPUT_FILE2
            + "' as (name:chararray, gender:chararray);"
            + "C = join A by name, B by name using 'skewed' parallel 1;" + "D = group C by A::name;"
            + "E = foreach D generate group, COUNT(C.A::name);" + "STORE E into '" + outputDir + "/finalout';";
    String log = testIncreaseIntermediateParallelism(script, outputDir, true);
    // Parallelism of C should be increased
    assertTrue(log.contains("Increased requested parallelism of scope-59 to 4"));
    assertEquals(1, StringUtils.countMatches(log, "Increased requested parallelism"));
    assertTrue(log.contains("Total estimated parallelism is 40"));
}

From source file:org.apache.pig.tez.TestTezAutoParallelism.java

@Test
public void testIncreaseIntermediateParallelism2() throws IOException {
    // User specified parallelism should not be overriden for intermediate step if there is a STORE
    String outputDir = "/tmp/testIncreaseIntermediateParallelism";
    String script = "A = load '" + INPUT_FILE1 + "' as (name:chararray, age:int);" + "B = load '" + INPUT_FILE2
            + "' as (name:chararray, gender:chararray);"
            + "C = join A by name, B by name using 'skewed' parallel 2;"
            + "STORE C into '/tmp/testIncreaseIntermediateParallelism';" + "D = group C by A::name parallel 2;"
            + "E = foreach D generate group, COUNT(C.A::name);" + "STORE E into '" + outputDir + "/finalout';";
    String log = testIncreaseIntermediateParallelism(script, outputDir, true);
    // Parallelism of C will not be increased as the Split has a STORE
    assertEquals(0, StringUtils.countMatches(log, "Increased requested parallelism"));
}

From source file:org.apache.pig.tez.TestTezAutoParallelism.java

@Test
public void testIncreaseIntermediateParallelism3() throws IOException {
    // Multiple levels with default parallelism. Group by followed by Group by
    try {//  w  w  w.  j a v a2 s.  co m
        String outputDir = "/tmp/testIncreaseIntermediateParallelism";
        String script = "set default_parallel 1\n" + "A = load '" + INPUT_FILE1
                + "' as (name:chararray, age:int);" + "B = load '" + INPUT_FILE2
                + "' as (name:chararray, gender:chararray);" + "C = join A by name, B by name;"
                + "STORE C into '/tmp/testIncreaseIntermediateParallelism';" + "C1 = group C by A::name;"
                + "C2 = FOREACH C1 generate group, FLATTEN(C);" + "D = group C2 by group;"
                + "E = foreach D generate group, COUNT(C2.A::name);" + "F = order E by $0;" + "STORE F into '"
                + outputDir + "/finalout';";
        String log = testIncreaseIntermediateParallelism(script, outputDir, false);
        // Parallelism of C1 should be increased. C2 will not be increased due to order by
        assertEquals(1, StringUtils.countMatches(log, "Increased requested parallelism"));
        assertTrue(log.contains("Increased requested parallelism of scope-65 to 10"));
        assertTrue(log.contains("Total estimated parallelism is 19"));
    } finally {
        pigServer.setDefaultParallel(-1);
    }
}