Example usage for java.util.regex Pattern split

List of usage examples for java.util.regex Pattern split

Introduction

In this page you can find the example usage for java.util.regex Pattern split.

Prototype

public String[] split(CharSequence input, int limit) 

Source Link

Document

Splits the given input sequence around matches of this pattern.

Usage

From source file:hydrograph.engine.spark.datasource.delimited.HydrographDelimitedParser.java

/**
 * Method createSplit will split the given {@code value} with the given {@code splitPattern}.
 *
 * @param value        of type String//from   w  w w .j a v  a2  s.co m
 * @param splitPattern of type Pattern
 * @param numValues    of type int
 * @return String[]
 */
public String[] createSplit(String value, Pattern splitPattern, int numValues) {
    return splitPattern.split(value, numValues);
}

From source file:com.vmware.identity.saml.impl.TokenValidatorImpl.java

/**
 * @param value/*  w ww.ja  v  a  2s .  c  o  m*/
 *           required
 * @param separator
 *           required
 * @return
 */
private String[] splitInTwo(String value, char separator) {

    Pattern splitter = Pattern.compile(Pattern.quote(String.valueOf(separator)));
    String split[] = splitter.split(value, 3);
    if (split.length != 2 || split[0].isEmpty() || split[1].isEmpty()) {
        throw new IllegalStateException(
                String.format("Invalid principal value: `%s' (incorrect number of fields)", value));
    }

    return split;
}

From source file:com.github.lindenb.jvarkit.tools.misc.AddLinearIndexToBed.java

protected int doWork(InputStream is, PrintStream out) throws IOException {
    final Pattern tab = Pattern.compile("[\t]");
    BufferedReader in = new BufferedReader(new InputStreamReader(is));
    String line = null;//  w w w. ja v a  2s .  c  o  m
    while ((line = in.readLine()) != null) {
        if (line.isEmpty() || line.startsWith("#") || line.startsWith("track") || line.startsWith("browser"))
            continue;
        String tokens[] = tab.split(line, 3);
        if (tokens.length < 2) {
            LOG.warn("Bad chrom/pos line:" + line);
            continue;
        }
        SAMSequenceRecord ssr = this.dictionary.getSequence(tokens[0]);
        if (ssr == null) {
            for (SAMSequenceRecord sr2 : this.dictionary.getSequences()) {
                LOG.info("available " + sr2.getSequenceName());
            }
            throw new IOException("undefined chromosome:" + tokens[0]);
        }
        int pos0 = Integer.parseInt(tokens[1]);
        if (pos0 < 0 || pos0 >= ssr.getSequenceLength()) {
            LOG.warn("position is out of range for : " + line + " length(" + tokens[0] + ")="
                    + ssr.getSequenceLength());
        }
        out.print(this.tid2offset[ssr.getSequenceIndex()] + pos0);
        out.print('\t');
        out.print(line);
        out.println();
        if (out.checkError())
            break;
    }
    return 0;
}

From source file:it.uniroma2.sag.kelp.wordspace.Wordspace.java

/**
 * Loads the word-vector pairs stored in the file whose path is <code>filename</code>
 * The file can be a plain text file or a .gz archive.
 * <p> /*from  w w  w .  j  a v  a  2  s .  com*/
 * The expected format is: </br>
 * number_of_vectors space_dimensionality</br> 
 * word_i [TAB] 1.0 [TAB] 0 [TAB] vector values comma separated
 * </code> </br></br> Example: </br></br> <code>
 * 3 5</br>
 * dog::n [TAB] 1.0 [TAB] 0 [TAB] 2.1,4.1,1.4,2.3,0.9</br>
 * cat::n [TAB] 1.0 [TAB] 0 [TAB] 3.2,4.3,1.2,2.2,0.8</br>
 * mouse::n [TAB] 1.0 [TAB] 0 [TAB] 2.4,4.4,2.4,1.3,0.92</br>
 *   
 * 
 * @param filename the path of the file containing the word-vector pairs
 * @throws IOException
 */
private void populate(String filename) throws IOException {
    BufferedReader br = null;
    GZIPInputStream gzis = null;
    if (filename.endsWith(".gz")) {
        gzis = new GZIPInputStream(new FileInputStream(filename));
        InputStreamReader reader = new InputStreamReader(gzis, "UTF8");
        br = new BufferedReader(reader);
    } else {
        br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF8"));
    }
    String line;
    ArrayList<String> split;
    String label;
    String[] vSplit;

    Pattern iPattern = Pattern.compile(",");
    float[] v = null;

    while ((line = br.readLine()) != null) {
        if (!line.contains("\t"))
            continue;
        float norm2 = 0;
        split = mySplit(line);
        label = split.get(0);
        vSplit = iPattern.split(split.get(3), 0);
        if (v == null)
            v = new float[vSplit.length];
        for (int i = 0; i < v.length; i++) {
            v[i] = Float.parseFloat(vSplit[i]);
            norm2 += v[i] * v[i];
        }
        float norm = (float) Math.sqrt(norm2);
        for (int i = 0; i < v.length; i++) {
            v[i] /= norm;
        }

        DenseMatrix64F featureVector = new DenseMatrix64F(1, v.length);
        for (int i = 0; i < v.length; i++) {
            featureVector.set(0, i, (double) v[i]);
        }

        DenseVector denseFeatureVector = new DenseVector(featureVector);

        addWordVector(label, denseFeatureVector);

    }
    if (filename.endsWith(".gz")) {
        gzis.close();
    }
    br.close();

}

From source file:com.jecelyin.editor.v2.core.text.TextUtils.java

/**
 * Splits a string on a pattern. String.split() returns [''] when the string to be
 * split is empty. This returns []. This does not remove any empty strings from the result.
 * @param text the string to split/*from   w w w. j a va2s. c om*/
 * @param pattern the regular expression to match
 * @return an array of strings. The array will be empty if text is empty
 *
 * @throws NullPointerException if expression or text is null
 */
public static String[] split(String text, Pattern pattern) {
    if (text.length() == 0) {
        return EMPTY_STRING_ARRAY;
    } else {
        return pattern.split(text, -1);
    }
}

From source file:com.github.lindenb.jvarkit.tools.redon.CopyNumber01.java

/** get a GC% */
private void prefillGCPercentWithCapture(File bedFile) throws Exception {
    long start = System.currentTimeMillis();
    Pattern tab = Pattern.compile("[\t]");
    BufferedReader in = IOUtils.openFileForBufferedReading(bedFile);
    String line;//from  w ww.j ava2s .  c  om
    Set<String> not_found = new HashSet<>(this.samDictionary.size());
    while ((line = in.readLine()) != null) {
        if (line.trim().isEmpty() || line.startsWith("#"))
            continue;
        String tokens[] = tab.split(line, 4);
        String chrom = tokens[0];
        if (this.samDictionary.getSequence(chrom) == null) {
            chrom = this.resolveChromName.get(chrom);
            if (chrom == null) {
                if (!not_found.contains(tokens[0])) {
                    info("Cannot resolve chromosome " + tokens[0] + " in " + line);
                    not_found.add(tokens[0]);
                }
                continue;
            }
        }

        if (ignoreChromosomeName(chrom)) {
            info("Ignoring " + chrom);
            continue;
        }
        String chrom_for_seq = tokens[0];//TODO

        GenomicSequence genomic = new GenomicSequence(this.indexedFastaSequenceFile, chrom_for_seq);
        int bedStart = Integer.parseInt(tokens[1]);
        int bedEnd = Integer.parseInt(tokens[2]);
        prefillGCPercent(genomic, bedStart, bedEnd);

        long now = System.currentTimeMillis();
        if (now - start > 10 * 1000) {
            info("BED:" + line + " " + this.interval2row.size());
            start = now;
        }
    }
}

From source file:edu.brown.utils.ArgumentsParser.java

/**
 * @param args// ww w.  j av  a2 s.  c  o  m
 * @throws Exception
 */
@SuppressWarnings("unchecked")
public void process(String[] args, String... required) throws Exception {
    final boolean debug = LOG.isDebugEnabled();

    if (debug)
        LOG.debug("Processing " + args.length + " parameters...");
    final Pattern p = Pattern.compile("=");
    for (int i = 0, cnt = args.length; i < cnt; i++) {
        final String arg = args[i];
        final String[] parts = p.split(arg, 2);
        if (parts[0].startsWith("-"))
            parts[0] = parts[0].substring(1);

        if (parts.length == 1) {
            if (parts[0].startsWith("${") == false)
                this.opt_params.add(parts[0]);
            continue;
        } else if (parts[0].equalsIgnoreCase("tag")) {
            continue;
        } else if (parts[1].startsWith("${") || parts[0].startsWith("#")) {
            continue;
        }
        if (debug)
            LOG.debug(String.format("%-35s = %s", parts[0], parts[1]));

        // DesignerHints Override
        if (parts[0].startsWith(PARAM_DESIGNER_HINTS_PREFIX)) {
            String param = parts[0].replace(PARAM_DESIGNER_HINTS_PREFIX, "").toLowerCase();
            try {
                Field f = DesignerHints.class.getField(param);
                this.hints_params.put(f.getName(), parts[1]);
                if (debug)
                    LOG.debug(String.format("DesignerHints.%s = %s", param, parts[1]));
            } catch (NoSuchFieldException ex) {
                throw new Exception("Unknown DesignerHints parameter: " + param, ex);
            }

        }
        // HStoreConf Parameter
        else if (HStoreConf.isConfParameter(parts[0])) {
            this.conf_params.put(parts[0].toLowerCase(), parts[1]);
        }
        // ArgumentsParser Parameter
        else if (PARAMS.contains(parts[0].toLowerCase())) {
            this.params.put(parts[0].toLowerCase(), parts[1]);
        }
        // Invalid!
        else {
            String suggestions = "";
            i = 0;
            String end = CollectionUtil.last(parts[0].split("\\."));
            for (String param : PARAMS) {
                String param_end = CollectionUtil.last(param.split("\\."));
                if (param.startsWith(parts[0]) || (end != null && param.endsWith(end))
                        || (end != null && param_end != null && param_end.startsWith(end))) {
                    if (suggestions.isEmpty())
                        suggestions = ". Possible Matches:";
                    suggestions += String.format("\n [%02d] %s", ++i, param);
                }
            } // FOR
            throw new Exception("Unknown parameter '" + parts[0] + "'" + suggestions);
        }
    } // FOR

    // -------------------------------------------------------
    // CATALOGS
    // -------------------------------------------------------

    // Text File
    if (this.params.containsKey(PARAM_CATALOG)) {
        String path = this.params.get(PARAM_CATALOG);
        if (debug)
            LOG.debug("Loading catalog from file '" + path + "'");
        Catalog catalog = CatalogUtil.loadCatalog(path);
        if (catalog == null)
            throw new Exception("Failed to load catalog object from file '" + path + "'");
        this.updateCatalog(catalog, new File(path));
    }
    // Jar File
    else if (this.params.containsKey(PARAM_CATALOG_JAR)) {
        String path = this.params.get(PARAM_CATALOG_JAR);
        this.params.put(PARAM_CATALOG, path);
        File jar_file = new File(path);
        Catalog catalog = CatalogUtil.loadCatalogFromJar(path);
        if (catalog == null)
            throw new Exception("Failed to load catalog object from jar file '" + path + "'");
        if (debug)
            LOG.debug("Loaded catalog from jar file '" + path + "'");
        this.updateCatalog(catalog, jar_file);

        if (!this.params.containsKey(PARAM_CATALOG_TYPE)) {
            String jar_name = jar_file.getName();
            int jar_idx = jar_name.lastIndexOf(".jar");
            if (jar_idx != -1) {
                ProjectType type = ProjectType.get(jar_name.substring(0, jar_idx));
                if (type != null) {
                    if (debug)
                        LOG.debug("Set catalog type '" + type + "' from catalog jar file name");
                    this.catalog_type = type;
                    this.params.put(PARAM_CATALOG_TYPE, this.catalog_type.toString());
                }
            }
        }

        // HACK: Extract the ParameterMappings embedded in jar and write them to a temp file
        // This is terrible, confusing, and a total mess...
        // I have no one to blame but myself...
        JarReader reader = new JarReader(jar_file.getAbsolutePath());
        for (String file : reader.getContentsFromJarfile()) {
            if (file.endsWith(".mappings")) {
                String contents = new String(JarReader.readFileFromJarAtURL(jar_file.getAbsolutePath(), file));
                File copy = FileUtil.writeStringToTempFile(contents, "mappings", true);
                this.params.put(PARAM_MAPPINGS, copy.toString());
                break;
            }
        } // FOR
    }
    // Schema File
    else if (this.params.containsKey(PARAM_CATALOG_SCHEMA)) {
        String path = this.params.get(PARAM_CATALOG_SCHEMA);
        Catalog catalog = CompilerUtil.compileCatalog(path);
        if (catalog == null)
            throw new Exception("Failed to load schema from '" + path + "'");
        if (debug)
            LOG.debug("Loaded catalog from schema file '" + path + "'");
        this.updateCatalog(catalog, new File(path));
    }

    // Catalog Type
    if (this.params.containsKey(PARAM_CATALOG_TYPE)) {
        String catalog_type = this.params.get(PARAM_CATALOG_TYPE);
        ProjectType type = ProjectType.get(catalog_type);
        if (type == null) {
            throw new Exception("Unknown catalog type '" + catalog_type + "'");
        }
        this.catalog_type = type;
    }

    // Update Cluster Configuration
    if (this.params.containsKey(ArgumentsParser.PARAM_CATALOG_HOSTS) && DISABLE_UPDATE_CATALOG == false) {
        ClusterConfiguration cc = new ClusterConfiguration(this.getParam(ArgumentsParser.PARAM_CATALOG_HOSTS));
        this.updateCatalog(FixCatalog.cloneCatalog(this.catalog, cc), null);
    }

    // Check the requirements after loading the catalog, because some of the
    // above parameters will set the catalog one
    if (required != null && required.length > 0)
        this.require(required);

    // -------------------------------------------------------
    // PHYSICAL DESIGN COMPONENTS
    // -------------------------------------------------------
    if (this.params.containsKey(PARAM_PARTITION_PLAN)) {
        assert (this.catalog_db != null);
        File path = new File(this.params.get(PARAM_PARTITION_PLAN));
        boolean ignoreMissing = this.getBooleanParam(ArgumentsParser.PARAM_PARTITION_PLAN_IGNORE_MISSING,
                false);
        if (path.exists() || (path.exists() == false && ignoreMissing == false)) {
            if (debug)
                LOG.debug("Loading in partition plan from '" + path + "'");
            this.pplan = new PartitionPlan();
            this.pplan.load(path, this.catalog_db);

            // Apply!
            if (this.params.containsKey(PARAM_PARTITION_PLAN_APPLY)
                    && this.getBooleanParam(PARAM_PARTITION_PLAN_APPLY)) {
                boolean secondaryIndexes = this.getBooleanParam(PARAM_PARTITION_PLAN_NO_SECONDARY,
                        false) == false;
                LOG.info(String.format("Applying PartitionPlan '%s' to catalog [enableSecondaryIndexes=%s]",
                        path.getName(), secondaryIndexes));
                this.pplan.apply(this.catalog_db, secondaryIndexes);
            }
        }
    }

    // -------------------------------------------------------
    // DESIGNER COMPONENTS
    // -------------------------------------------------------

    if (this.params.containsKey(PARAM_DESIGNER_THREADS)) {
        this.max_concurrent = Integer.valueOf(this.params.get(PARAM_DESIGNER_THREADS));
    }
    if (this.params.containsKey(PARAM_DESIGNER_INTERVALS)) {
        this.num_intervals = Integer.valueOf(this.params.get(PARAM_DESIGNER_INTERVALS));
    }
    if (this.params.containsKey(PARAM_DESIGNER_HINTS)) {
        File path = this.getFileParam(PARAM_DESIGNER_HINTS);
        if (debug)
            LOG.debug("Loading in designer hints from '" + path + "'.\nForced Values:\n"
                    + StringUtil.formatMaps(this.hints_params));
        this.designer_hints.load(path, catalog_db, this.hints_params);
    }
    if (this.params.containsKey(PARAM_DESIGNER_CHECKPOINT)) {
        this.designer_checkpoint = new File(this.params.get(PARAM_DESIGNER_CHECKPOINT));
    }

    String designer_attributes[] = { PARAM_DESIGNER_PARTITIONER, PARAM_DESIGNER_MAPPER, PARAM_DESIGNER_INDEXER,
            PARAM_DESIGNER_COSTMODEL };
    ClassLoader loader = ClassLoader.getSystemClassLoader();
    for (String key : designer_attributes) {
        if (this.params.containsKey(key)) {
            String target_name = this.params.get(key);
            Class<?> target_class = loader.loadClass(target_name);
            assert (target_class != null);
            if (debug)
                LOG.debug("Set " + key + " class to " + target_class.getName());

            if (key.equals(PARAM_DESIGNER_PARTITIONER)) {
                this.partitioner_class = (Class<? extends AbstractPartitioner>) target_class;
            } else if (key.equals(PARAM_DESIGNER_MAPPER)) {
                this.mapper_class = (Class<? extends AbstractMapper>) target_class;
            } else if (key.equals(PARAM_DESIGNER_INDEXER)) {
                this.indexer_class = (Class<? extends AbstractIndexSelector>) target_class;
            } else if (key.equals(PARAM_DESIGNER_COSTMODEL)) {
                this.costmodel_class = (Class<? extends AbstractCostModel>) target_class;

                // Special Case: TimeIntervalCostModel
                if (target_name.endsWith(TimeIntervalCostModel.class.getSimpleName())) {
                    this.costmodel = new TimeIntervalCostModel<SingleSitedCostModel>(this.catalogContext,
                            SingleSitedCostModel.class, this.num_intervals);
                } else {
                    this.costmodel = ClassUtil.newInstance(this.costmodel_class,
                            new Object[] { this.catalogContext }, new Class[] { Database.class });
                }
            } else {
                assert (false) : "Invalid key '" + key + "'";
            }
        }
    } // FOR

    // -------------------------------------------------------
    // TRANSACTION ESTIMATION COMPONENTS
    // -------------------------------------------------------
    if (this.params.containsKey(PARAM_MAPPINGS) && DISABLE_UPDATE_CATALOG == false) {
        assert (this.catalog_db != null);
        File path = new File(this.params.get(PARAM_MAPPINGS));
        if (path.exists()) {
            this.param_mappings.load(path, this.catalog_db);
        } else {
            LOG.warn("The ParameterMappings file '" + path + "' does not exist");
        }
    }
    if (this.params.containsKey(PARAM_MARKOV_THRESHOLDS_VALUE)) {
        assert (this.catalog_db != null);
        float defaultValue = this.getDoubleParam(PARAM_MARKOV_THRESHOLDS_VALUE).floatValue();
        this.thresholds = new EstimationThresholds(defaultValue);
        this.params.put(PARAM_MARKOV_THRESHOLDS, this.thresholds.toString());
        LOG.debug("CREATED THRESHOLDS: " + this.thresholds);

    } else if (this.params.containsKey(PARAM_MARKOV_THRESHOLDS)) {
        assert (this.catalog_db != null);
        this.thresholds = new EstimationThresholds();
        File path = new File(this.params.get(PARAM_MARKOV_THRESHOLDS));
        if (path.exists()) {
            this.thresholds.load(path, this.catalog_db);
        } else {
            LOG.warn("The estimation thresholds file '" + path + "' does not exist");
        }
        LOG.debug("LOADED THRESHOLDS: " + this.thresholds);
    }

    // -------------------------------------------------------
    // HASHER
    // -------------------------------------------------------
    if (this.catalog != null) {
        if (this.params.containsKey(PARAM_HASHER_CLASS)) {
            String hasherClassName = this.params.get(PARAM_HASHER_CLASS);
            this.hasher_class = (Class<? extends AbstractHasher>) loader.loadClass(hasherClassName);
        }
        Class<?> paramClasses[] = new Class[] { CatalogContext.class, int.class };
        Object paramValues[] = new Object[] { this.catalogContext, this.catalogContext.numberOfPartitions };

        Constructor<? extends AbstractHasher> constructor = this.hasher_class.getConstructor(paramClasses);
        this.hasher = constructor.newInstance(paramValues);

        if (!(this.hasher instanceof DefaultHasher))
            LOG.debug("Loaded hasher " + this.hasher.getClass());

        if (this.params.containsKey(PARAM_HASHER_PROFILE)) {
            this.hasher.load(this.getFileParam(PARAM_HASHER_PROFILE), null);
        }
    }

    // -------------------------------------------------------
    // SAMPLE WORKLOAD TRACE
    // -------------------------------------------------------
    this.loadWorkload();
}

From source file:com.rapidminer.tools.Tools.java

/**
 * Splits the given line according to the given separator pattern while only those separators
 * will be regarded not lying inside of a quoting string. Please note that quoting characters
 * will not be regarded if they are escaped by an escaping character.
 *///from  w ww.j ava  2s  .c o  m
public static String[] quotedSplit(String line, Pattern separatorPattern, char quotingChar, char escapeChar) {

    // determine split positions according to non-escaped quotes
    int[] quoteSplitIndices = new int[line.length()];
    char lastChar = '0';
    int lastSplitIndex = -1;
    for (int i = 0; i < line.length(); i++) {
        char currentChar = line.charAt(i);
        if (currentChar == quotingChar) {
            boolean escaped = false;
            if (i != 0 && lastChar == escapeChar) {
                escaped = true;
            }

            if (!escaped) {
                quoteSplitIndices[++lastSplitIndex] = i;
            }
        }
        lastChar = currentChar;
    }

    // add quote parts to a list and replace escape chars
    List<String> quotedSplits = new LinkedList<>();
    if (lastSplitIndex < 0) {
        line = line.replaceAll("\\\\\"", "\""); // remove escape characters
        quotedSplits.add(line);
    } else {
        int start = 0;
        for (int i = 0; i <= lastSplitIndex; i++) {
            int end = quoteSplitIndices[i];
            String part = "";
            if (end > start) {
                part = line.substring(start, end);
            }
            part = part.replaceAll("\\\\\"", "\""); // remove escape characters
            quotedSplits.add(part);
            start = end + 1;
        }
        if (start < line.length()) {
            String part = line.substring(start);
            part = part.replaceAll("\\\\\"", "\""); // remove escape characters
            quotedSplits.add(part);
        }
    }

    // now handle split and non split parts
    // ALGORITHM:
    // *** at Split-Parts: remove empty starts and endings, use empty parts in the middle (as
    // missing), use also
    // non-empty parts (as non missing)
    // - Exception: the first and the last split parts. Here also the start and the beginning
    // must be used even if
    // they are empty (they are missing then)
    // *** at Non-Split-Parts: simply use the whole value. It is missing if is empty.
    // IMPORTANT: a negative limit for the split method (here: -1) is very important in order to
    // get empty trailing
    // values
    List<String> result = new LinkedList<>();
    boolean isSplitPart = true;
    int index = 0;
    for (String part : quotedSplits) {
        if (index > 0 || part.trim().length() > 0) { // skip first split if part is empty
            // (coming from leading
            // quotes in the line)
            if (isSplitPart) {
                String[] separatedParts = separatorPattern.split(part, -1); // ATTENTION: a
                // negative Limit is
                // very
                // important to get trailing empty
                // strings
                for (int s = 0; s < separatedParts.length; s++) {
                    String currentPart = separatedParts[s].trim();
                    if (currentPart.length() == 0) { // part is empty -- missing if in the
                        // middle or at line start
                        // or end
                        if (s == 0 && index == 0) {
                            result.add(currentPart);
                        } else if (s == separatedParts.length - 1 && index == quotedSplits.size() - 1) {
                            result.add(currentPart);
                        } else if (s > 0 && s < separatedParts.length - 1) {
                            result.add(currentPart);
                        }
                    } else {
                        result.add(currentPart);
                    }
                }
            } else {
                result.add(part);
            }
        }
        isSplitPart = !isSplitPart;
        index++;
    }

    String[] resultArray = new String[result.size()];
    result.toArray(resultArray);
    return resultArray;
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

/**
 * Get a distinct list of species and their counts using a facet search
 *
 * @param queryString//from w ww  . ja va  2  s.  c o m
 * @param pageSize
 * @param sortField
 * @param sortDirection
 * @return
 * @throws SolrServerException
 */
protected List<TaxaCountDTO> getSpeciesCounts(String queryString, List<String> filterQueries,
        List<String> facetFields, Integer pageSize, Integer startIndex, String sortField, String sortDirection)
        throws SolrServerException {

    List<TaxaCountDTO> speciesCounts = new ArrayList<TaxaCountDTO>();
    SolrQuery solrQuery = new SolrQuery();
    solrQuery.setQueryType("standard");
    solrQuery.setQuery(queryString);

    if (filterQueries != null && filterQueries.size() > 0) {
        //solrQuery.addFilterQuery("(" + StringUtils.join(filterQueries, " OR ") + ")");
        for (String fq : filterQueries) {
            solrQuery.addFilterQuery(fq);
        }
    }
    solrQuery.setRows(0);
    solrQuery.setFacet(true);
    solrQuery.setFacetSort(sortField);
    for (String facet : facetFields) {
        solrQuery.addFacetField(facet);
        logger.debug("adding facetField: " + facet);
    }
    //set the facet starting point based on the paging information
    solrQuery.setFacetMinCount(1);
    solrQuery.setFacetLimit(pageSize); // unlimited = -1 | pageSize
    solrQuery.add("facet.offset", Integer.toString(startIndex));
    logger.debug("getSpeciesCount query :" + solrQuery.getQuery());
    QueryResponse qr = runSolrQuery(solrQuery, null, 1, 0, "score", sortDirection);
    logger.info("SOLR query: " + solrQuery.getQuery() + "; total hits: " + qr.getResults().getNumFound());
    List<FacetField> facets = qr.getFacetFields();
    java.util.regex.Pattern p = java.util.regex.Pattern.compile("\\|");

    if (facets != null && facets.size() > 0) {
        logger.debug("Facets: " + facets.size() + "; facet #1: " + facets.get(0).getName());
        for (FacetField facet : facets) {
            List<FacetField.Count> facetEntries = facet.getValues();
            if ((facetEntries != null) && (facetEntries.size() > 0)) {
                //NO need to page through all facets to locate the current page...
                //for (int i = 0; i < facetEntries.size(); i++) {
                //int highestEntry = (pageSize < 0) ? facetEntries.size() : startIndex + pageSize;
                //int lastEntry = (highestEntry > facetEntries.size()) ? facetEntries.size() : highestEntry;
                //logger.debug("highestEntry = " + highestEntry + ", facetEntries.size = " + facetEntries.size() + ", lastEntry = " + lastEntry);
                //for (int i = startIndex; i < lastEntry; i++) {
                for (FacetField.Count fcount : facetEntries) {
                    //FacetField.Count fcount = facetEntries.get(i);
                    //speciesCounts.add(i, new TaxaCountDTO(fcount.getName(), fcount.getCount()));
                    TaxaCountDTO tcDTO = null;
                    if (fcount.getFacetField().getName().equals(NAMES_AND_LSID)) {
                        String[] values = p.split(fcount.getName(), 5);

                        if (values.length >= 5) {
                            if (!"||||".equals(fcount.getName())) {
                                tcDTO = new TaxaCountDTO(values[0], fcount.getCount());
                                tcDTO.setGuid(StringUtils.trimToNull(values[1]));
                                tcDTO.setCommonName(values[2]);
                                tcDTO.setKingdom(values[3]);
                                tcDTO.setFamily(values[4]);
                                if (StringUtils.isNotEmpty(tcDTO.getGuid()))
                                    tcDTO.setRank(searchUtils.getTaxonSearch(tcDTO.getGuid())[1].split(":")[0]);
                            }
                        } else {
                            logger.debug("The values length: " + values.length + " :" + fcount.getName());
                            tcDTO = new TaxaCountDTO(fcount.getName(), fcount.getCount());
                        }
                        //speciesCounts.add(i, tcDTO);
                        if (tcDTO != null)
                            speciesCounts.add(tcDTO);
                    } else if (fcount.getFacetField().getName().equals(COMMON_NAME_AND_LSID)) {
                        String[] values = p.split(fcount.getName(), 6);

                        if (values.length >= 5) {
                            if (!"|||||".equals(fcount.getName())) {
                                tcDTO = new TaxaCountDTO(values[1], fcount.getCount());
                                tcDTO.setGuid(StringUtils.trimToNull(values[2]));
                                tcDTO.setCommonName(values[0]);
                                //cater for the bug of extra vernacular name in the result
                                tcDTO.setKingdom(values[values.length - 2]);
                                tcDTO.setFamily(values[values.length - 1]);
                                if (StringUtils.isNotEmpty(tcDTO.getGuid()))
                                    tcDTO.setRank(searchUtils.getTaxonSearch(tcDTO.getGuid())[1].split(":")[0]);
                            }
                        } else {
                            logger.debug("The values length: " + values.length + " :" + fcount.getName());
                            tcDTO = new TaxaCountDTO(fcount.getName(), fcount.getCount());
                        }
                        //speciesCounts.add(i, tcDTO);
                        if (tcDTO != null) {
                            speciesCounts.add(tcDTO);
                        }
                    }
                }
            }
        }
    }

    return speciesCounts;
}