Example usage for java.util.concurrent ForkJoinPool invokeAll

List of usage examples for java.util.concurrent ForkJoinPool invokeAll

Introduction

In this page you can find the example usage for java.util.concurrent ForkJoinPool invokeAll.

Prototype

public <T> List<Future<T>> invokeAll(Collection<? extends Callable<T>> tasks) 

Source Link

Usage

From source file:com.hygenics.parser.GetImages.java

private void getImages() {
    // controls the web process from a removed method
    log.info("Setting Up Pull");
    String[] proxyarr = (proxies == null) ? null : proxies.split(",");
    // cleanup//from   www. ja  va  2s .  com
    if (cleanup) {
        cleanupDir(fpath);
    }

    // image grab
    CookieManager cm = new CookieManager();
    cm.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
    CookieHandler.setDefault(cm);
    int numimages = 0;
    InputStream is;
    byte[] bytes;
    int iter = 0;
    int found = 0;

    // set proxy if needed
    if (proxyuser != null) {
        proxy(proxyhost, proxyport, https, proxyuser, proxypass);
    }

    int i = 0;
    ArrayList<String> postImages = new ArrayList<String>();
    ForkJoinPool fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors());
    Set<Callable<String>> pulls = new HashSet<Callable<String>>();
    Set<Callable<ArrayList<String>>> sqls = new HashSet<Callable<ArrayList<String>>>();
    List<Future<String>> imageFutures;

    ArrayList<String> images;
    int chunksize = (int) Math.ceil(commitsize / numqueries);
    log.info("Chunksize: " + chunksize);
    if (baseurl != null || baseurlcolumn != null) {
        do {
            log.info("Offset: " + offset);
            log.info("Getting Images");
            images = new ArrayList<String>(commitsize);
            log.info("Getting Columns");
            for (int n = 0; n < numqueries; n++) {
                String tempsql = sql + " WHERE " + idString + " >= " + offset + " AND " + idString + " < "
                        + (offset + chunksize);

                if (conditions != null) {
                    tempsql += conditions;
                }

                sqls.add(new QueryDatabase(
                        ((extracondition != null) ? tempsql + " " + extracondition : tempsql)));

                offset += chunksize;
            }

            List<Future<ArrayList<String>>> futures = fjp.invokeAll(sqls);

            int w = 0;
            while (fjp.isQuiescent() && fjp.getActiveThreadCount() > 0) {
                w++;
            }

            for (Future<ArrayList<String>> f : futures) {
                try {
                    ArrayList<String> fjson;
                    fjson = f.get();
                    if (fjson.size() > 0) {
                        images.addAll(fjson);
                    }

                    if (f.isDone() == false) {
                        f.cancel(true);
                    }
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } catch (ExecutionException e) {
                    e.printStackTrace();
                }
            }
            log.info(Integer.toString(images.size()) + " image links found. Pulling.");

            ArrayList<String> tempproxies = new ArrayList<String>();

            if (proxyarr != null) {
                for (String proxy : proxyarr) {
                    tempproxies.add(proxy.trim());
                }
            }

            if (maxproxies > 0) {
                maxproxies -= 1;// 0 and 1 should be equivalent conditions
                // --num is not like most 0 based still due
                // to >=
            }

            // get images
            for (int num = 0; num < images.size(); num++) {
                String icols = images.get(num);
                int proxnum = (int) Math.random() * (tempproxies.size() - 1);
                String proxy = (tempproxies.size() == 0) ? null : tempproxies.get(proxnum);

                // add grab
                pulls.add(new ImageGrabber(icols, proxy));

                if (proxy != null) {
                    tempproxies.remove(proxy);
                }

                // check for execution
                if (num + 1 == images.size() || pulls.size() >= commitsize || tempproxies.size() == 0) {
                    if (tempproxies.size() == 0 && proxies != null) {
                        tempproxies = new ArrayList<String>(proxyarr.length);

                        for (String p : proxyarr) {
                            tempproxies.add(p.trim());
                        }
                    }

                    imageFutures = fjp.invokeAll(pulls);
                    w = 0;

                    while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                        w++;
                    }

                    for (Future<String> f : imageFutures) {
                        String add;
                        try {
                            add = f.get();

                            if (add != null) {
                                postImages.add(add);
                            }
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                        } catch (ExecutionException e) {
                            e.printStackTrace();
                        }
                    }
                    imageFutures = null;// garbage collect elligible
                    pulls = new HashSet<Callable<String>>(commitsize);
                }

                if (postImages.size() >= commitsize && addtoDB == true) {
                    if (addtoDB) {
                        log.info("Posting to Database");
                        log.info("Found " + postImages.size() + " images");
                        numimages += postImages.size();
                        int size = (int) Math.floor(postImages.size() / numqueries);
                        for (int n = 0; n < numqueries; n++) {
                            if (((n + 1) * size) < postImages.size() && (n + 1) < numqueries) {
                                fjp.execute(new ImagePost(postImages.subList(n * size, (n + 1) * size)));
                            } else {
                                fjp.execute(new ImagePost(postImages.subList(n * size, postImages.size() - 1)));
                            }
                        }

                        w = 0;
                        while (fjp.isQuiescent() && fjp.getActiveThreadCount() > 0) {
                            w++;
                        }
                    }
                    found += postImages.size();
                    postImages.clear();
                }

            }

            if (postImages.size() > 0 && addtoDB == true) {
                log.info("Posting to Database");
                numimages += postImages.size();
                int size = (int) Math.floor(postImages.size() / numqueries);
                for (int n = 0; n < numqueries; n++) {
                    if (((n + 1) * size) < postImages.size()) {
                        fjp.execute(new ImagePost(postImages.subList(n * size, (n + 1) * size)));
                    } else {
                        fjp.execute(new ImagePost(postImages.subList(n * size, postImages.size())));
                    }
                }

                w = 0;
                while (fjp.isQuiescent() && fjp.getActiveThreadCount() > 0) {
                    w++;
                }

                found += postImages.size();
                postImages.clear();
            }

            // handle iterations specs
            iter += 1;
            log.info("Iteration: " + iter);
            if ((iter < iterations && found < images.size()) || tillfound == true) {
                log.info("Not All Images Obtained Trying Iteration " + iter + " of " + iterations);
                offset -= commitsize;
            } else if ((iter < iterations && found >= images.size()) && tillfound == false) {
                log.info("Images Obtained in " + iter + " iterations. Continuing.");
                iter = 0;
            } else {
                // precautionary
                log.info("Images Obtained in " + iter + " iterations. Continuing");
                iter = 0;
            }

        } while (images.size() > 0 && iter < iterations);

        if (fjp.isShutdown()) {
            fjp.shutdownNow();
        }
    }

    log.info("Complete. Check for Errors \n " + numimages + " Images Found");
}

From source file:com.hygenics.parser.BreakMultiple.java

/**
 * run the class/*from www . ja v a 2 s  . c o m*/
 */
public void run() {
    int j = 0;
    checkTable();
    rows = new ArrayList<String>();
    log.info("Starting Break");

    // the pool
    ForkJoinPool fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procnum);

    // for returned results
    List<Future<ArrayList<String>>> futures = new ArrayList<Future<ArrayList<String>>>();

    // for parsing
    Set<Callable<ArrayList<String>>> collect = new HashSet<Callable<ArrayList<String>>>();

    // for querying
    Set<Callable<ArrayList<String>>> qcollect = new HashSet<Callable<ArrayList<String>>>();

    // results
    ArrayList<String> jsons = new ArrayList<String>();

    String condition = null;
    int size = (int) Math.ceil(pullsize / qnum);
    // get initial data from user
    for (int i = 0; i < qnum; i++) {
        condition = " WHERE " + idcolumn + " > " + Integer.toString(offset + (Math.round(pullsize / qnum) * i))
                + " AND " + idcolumn + " <= "
                + Integer.toString(offset + (Math.round(pullsize / qnum) * (i + 1)));

        if (extracondition != null) {
            condition += " " + extracondition.trim();
        }

        qcollect.add(new GetFromDB((select + condition), template));
        log.info("SELECTING " + select + " " + condition);
    }

    log.info("Getting From DB @" + Calendar.getInstance().getTime().toString());
    futures = fjp.invokeAll(qcollect);

    int w = 0;
    while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
        w++;
    }

    log.info("Waited for " + w + "Cycles");

    for (Future<ArrayList<String>> f : futures) {
        try {
            rows.addAll(f.get());
            f.cancel(true);
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ExecutionException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    qcollect = new HashSet<Callable<ArrayList<String>>>();
    futures = null;

    log.info("Breaking");
    // process while there is still data to process
    while (rows.size() > 0) {
        log.info("Iteration Contains " + rows.size() + " Rows");
        // add to the commit size for future processing
        offset += pullsize;
        log.info("Submitting Tasks");
        // submit for breaking apart

        for (String r : rows) {

            if (fjp.isShutdown()) {
                fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procnum);
            }

            if (r != null) {

                if (mustcontain != null) {
                    if (r.contains(mustcontain)) {
                        if (cannotcontain != null) {
                            if (r.contains(cannotcontain) == false) {
                                Map<String, Json> rowmap = Json.read(r).asJsonMap();

                                // final getDAOTemplate template, final
                                // String row, final String token, final
                                // String replacementPattern, final
                                // Map<String,String> positions,final String
                                // date, final String table, final String
                                // offenderhash
                                if (rowmap.size() > 0) {
                                    collect.add(new Break(unescape, repeatkeys, template,
                                            rowmap.get(rowcolumn).asString(), token, replacementPattern,
                                            positions, (Calendar.getInstance().getTime().toString()),
                                            targettable, rowmap.get("offenderhash").asString(), maxpos,
                                            genhash));
                                }
                            }
                        } else {
                            Map<String, Json> rowmap = Json.read(r).asJsonMap();

                            // final getDAOTemplate template, final String
                            // row, final String token, final String
                            // replacementPattern, final Map<String,String>
                            // positions,final String date, final String
                            // table, final String offenderhash
                            if (rowmap.size() > 0) {
                                collect.add(new Break(unescape, repeatkeys, template,
                                        rowmap.get(rowcolumn).asString(), token, replacementPattern, positions,
                                        (Calendar.getInstance().getTime().toString()), targettable,
                                        rowmap.get("offenderhash").asString(), maxpos, genhash));
                            }
                        }
                    }
                } else {

                    if (cannotcontain != null) {
                        if (r.contains(cannotcontain) == false) {
                            Map<String, Json> rowmap = Json.read(r).asJsonMap();

                            // to ascend you must die, to die you must be
                            // crucified; so get off your -- cross so that
                            // we can nail down the nex martyr
                            // final getDAOTemplate template, final String
                            // row, final String token, final String
                            // replacementPattern, final Map<String,String>
                            // positions,final String date, final String
                            // table, final String offenderhash
                            if (rowmap.size() > 0) {
                                collect.add(new Break(unescape, repeatkeys, template,
                                        rowmap.get(rowcolumn).asString(), token, replacementPattern, positions,
                                        (Calendar.getInstance().getTime().toString()), targettable,
                                        rowmap.get("offenderhash").asString(), maxpos, genhash));
                            }
                        }
                    } else {
                        Map<String, Json> rowmap = Json.read(r).asJsonMap();

                        // final getDAOTemplate template, final String row,
                        // final String token, final String
                        // replacementPattern, final Map<String,String>
                        // positions,final String date, final String table,
                        // final String offenderhash
                        if (rowmap.size() > 0) {
                            collect.add(new Break(unescape, repeatkeys, template,
                                    rowmap.get(rowcolumn).asString(), token, replacementPattern, positions,
                                    (Calendar.getInstance().getTime().toString()), targettable,
                                    rowmap.get("offenderhash").asString(), maxpos, genhash));
                        }
                    }
                }
            }
        }

        log.info("SUBMITTED " + collect.size() + " tasks");

        futures = fjp.invokeAll(collect);

        w = 0;

        while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
            w++;
        }

        log.info("Waited for " + w + " Cycles");

        jsons.clear();
        log.info("Getting Strings");
        try {

            for (Future<ArrayList<String>> p : futures) {
                ArrayList<String> retlist = p.get();

                if (retlist != null) {
                    if (retlist.size() > 0) {
                        jsons.addAll(retlist);
                    }

                    if (jsons.size() >= commit_size) {
                        // send to db
                        if (jsons.size() > SPLITSIZE) {
                            log.info("Split True: Sending to DB @ "
                                    + Calendar.getInstance().getTime().toString());

                            postToDb(jsons, true);
                            jsons = new ArrayList<String>();
                            log.info("Posted to DB @ " + Calendar.getInstance().getTime().toString());
                        } else {
                            log.info("Split False: Sending to DB @ "
                                    + Calendar.getInstance().getTime().toString());
                            postToDb(jsons, false);
                            jsons = new ArrayList<String>();
                            log.info("Posted to DB @ " + Calendar.getInstance().getTime().toString());
                        }
                    }
                }
                p.cancel(true);
            }
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ExecutionException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        futures = null;
        collect = new HashSet<Callable<ArrayList<String>>>();

        // send to db
        if (jsons.size() > SPLITSIZE) {
            log.info("Split True: Sending to DB @" + Calendar.getInstance().getTime().toString());
            postToDb(jsons, true);
            jsons = new ArrayList<String>();
            log.info("Posted to DB @ " + Calendar.getInstance().getTime().toString());
        } else {
            log.info("Split False: Sending to DB @" + Calendar.getInstance().getTime().toString());
            postToDb(jsons, false);
            jsons = new ArrayList<String>();
            log.info("Posted to DB @ " + Calendar.getInstance().getTime().toString());
        }

        // get more information
        rows = new ArrayList<String>();

        if (Runtime.getRuntime().freeMemory() < 500000 | ((loops % waitloops) == 0 & waitloops != 0)) {
            log.info("Paused Free Memory Left: " + Runtime.getRuntime().freeMemory());
            System.gc();
            Runtime.getRuntime().gc();

            try {
                Thread.sleep(2000);
            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

            while (Runtime.getRuntime().freeMemory() < 500000) {
                try {
                    Thread.sleep(2000);
                } catch (InterruptedException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }

            log.info("Restart Free Memory Left: " + Runtime.getRuntime().freeMemory());
        }

        rows = new ArrayList<String>();

        // attempt to query the database from multiple threads
        for (int conn = 1; conn <= qnum; conn++) {
            // change condition
            condition = " WHERE " + idcolumn + " > "
                    + Integer.toString(offset + (Math.round(pullsize / qnum) * conn)) + " AND " + idcolumn
                    + " <= " + Integer.toString(offset + (Math.round(pullsize / qnum) * (conn + 1)));

            if (extracondition != null) {
                condition += " " + extracondition.trim();
            }

            qcollect.add(new GetFromDB((select + condition), template));
            log.info("SELECTING " + select + " " + condition);
        }

        futures = fjp.invokeAll(qcollect);

        w = 0;

        while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
            w++;
        }

        log.info("Waited for " + w + " Cycles");

        for (Future<ArrayList<String>> f : futures) {
            try {

                ArrayList<String> test = f.get();

                if (test != null) {
                    if (test.size() > 0) {
                        rows.addAll(test);
                    }
                }

                f.cancel(true);

            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (ExecutionException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

        futures = null;
        qcollect = new HashSet<Callable<ArrayList<String>>>(4);

        j++;

        Runtime.getRuntime().gc();
        System.gc();

    }

    // send to db
    if (jsons.size() > SPLITSIZE) {
        log.info("Split True: Sending to DB @" + Calendar.getInstance().getTime().toString());
        postToDb(jsons, true);
        jsons = new ArrayList<String>();
    } else if (jsons.size() > 0) {
        log.info("Split False: Sending to DB @" + Calendar.getInstance().getTime().toString());
        postToDb(jsons, false);
        jsons = new ArrayList<String>();
    }

    Runtime.getRuntime().gc();
    System.gc();

    log.info("Shutting Down Forkjoin Pool");
    if (fjp.isShutdown() == false) {
        fjp.shutdownNow();
    }
}

From source file:com.hygenics.parser.KVParser.java

public void run() {
    log.info("Starting Parse @ " + Calendar.getInstance().getTime().toString());
    ForkJoinPool fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procs);
    Set<Callable<ArrayList<String>>> collection;
    List<Future<ArrayList<String>>> futures;
    ArrayList<String> data = new ArrayList<String>((commitsize + 10));
    ArrayList<String> outdata = new ArrayList<String>(((commitsize + 10) * 3));

    int currpos = 0;
    boolean run = true;

    while (run) {
        log.info("Getting Pages");
        // get pages
        String query = select;// w  ww.  j  av  a2s . c o  m

        if (data.size() > 0) {
            data.clear();
        }

        if (extracondition != null) {
            query += " " + extracondition;
        }

        if (extracondition != null) {
            query += " WHERE " + extracondition + " AND ";
        } else {
            query += " WHERE ";
        }

        collection = new HashSet<Callable<ArrayList<String>>>(qnums);
        for (int i = 0; i < qnums; i++) {

            if (currpos + (Math.round(commitsize / qnums * (i + 1))) < currpos + commitsize) {
                collection.add(new SplitQuery((query + pullid + " >= "
                        + Integer.toString(currpos + (Math.round(commitsize / qnums * (i)))) + " AND " + pullid
                        + " < " + Integer.toString(currpos + (Math.round(commitsize / qnums * (i + 1)))))));
            } else {
                collection.add(new SplitQuery((query + pullid + " >= "
                        + Integer.toString(currpos + (Math.round(commitsize / qnums * (i)))) + " AND " + pullid
                        + " < " + Integer.toString(currpos + commitsize))));
            }
        }

        currpos += commitsize;

        if (collection.size() > 0) {

            futures = fjp.invokeAll(collection);

            int w = 0;

            while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                w++;
            }

            for (Future<ArrayList<String>> f : futures) {
                try {
                    ArrayList<String> darr = f.get();
                    if (darr != null && darr.size() > 0) {
                        data.addAll(darr);
                    }
                } catch (NullPointerException e) {
                    log.info("Some Data Returned Null");
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } catch (ExecutionException e) {
                    e.printStackTrace();
                }
            }

        }

        if (data.size() == 0 && checkString != null) {
            collection = new HashSet<Callable<ArrayList<String>>>(1);
            collection.add(new SplitQuery(checkString));

            futures = fjp.invokeAll(collection);
            int w = 0;
            while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                w++;
            }

            for (Future<ArrayList<String>> f : futures) {
                try {
                    ArrayList<String> arr = f.get();

                    if (arr != null) {
                        for (String a : arr) {
                            if (a != null) {
                                data.add(a);
                            }
                        }
                    }
                    if (!f.isDone()) {
                        f.cancel(true);
                    }
                    f = null;
                } catch (NullPointerException e) {
                    log.info("Some Data Returned Null");
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } catch (ExecutionException e) {
                    e.printStackTrace();
                }
            }
        }

        // parse pages
        if (data.size() > 0) {
            log.info("Parsing " + Integer.toString(data.size()) + " Records");
            collection = new HashSet<Callable<ArrayList<String>>>(data.size());

            for (String json : data) {
                Map<String, Object> jmap = Json.read(json).asMap();

                // for each table in the tags Map which is a key
                for (String k : tags.keySet()) {

                    collection.add(new Parser(tags.get(k), jmap.get(htmlColumn).toString(), replacePattern,
                            replacement, jmap.get(hashColumn).toString(), hashColumn, k));

                    if (collection.size() + 1 == data.size()
                            || (collection.size() % commitsize == 0 && collection.size() >= commitsize)) {
                        log.info("Waiting for Tasks to Complete");
                        futures = fjp.invokeAll(collection);

                        // post data
                        int w = 0;
                        while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                            w++;
                        }

                        for (Future<ArrayList<String>> future : futures) {
                            try {
                                outdata.addAll(future.get());
                            } catch (NullPointerException e) {
                                log.info("Some Data Returned Null");
                            } catch (InterruptedException e) {
                                e.printStackTrace();
                            } catch (ExecutionException e) {
                                e.printStackTrace();
                            }
                        }

                        log.info("Parsed " + outdata.size() + " records!");
                        // post data
                        int cp = 0;
                        if (outdata.size() > 0) {
                            checkTables(outdata);
                            this.sendToDb(outdata, true);
                            outdata = new ArrayList<String>(commitsize);
                        }

                    }

                }
            }
            data = new ArrayList<String>(commitsize);
        } else {
            log.info("No Records Found. Terminating!");
            run = false;
        }

    }

    if (outdata.size() > 0) {
        log.info("Posting Last Records");
        // post remaining pages for the iteration
        if (outdata.size() > 0) {
            int cp = 0;
            if (outdata.size() > 0) {
                checkTables(outdata);
                this.sendToDb(outdata, true);
            }
            data.clear();
            outdata.clear();
        }
    }

    // shutdown
    log.info("Complete! Shutting Down FJP.");
    fjp.shutdownNow();

    log.info("Finished Parse @ " + Calendar.getInstance().getTime().toString());
}

From source file:com.hygenics.parser.ParseJSoup.java

/**
 * Runs the Program// ww w  . ja  va2s  . co  m
 */
public void run() {
    int its = 0;

    this.select = Properties.getProperty(this.select);
    this.extracondition = Properties.getProperty(this.extracondition);
    this.column = Properties.getProperty(this.column);

    createTables();
    log.info("Starting Parse via JSoup @ " + Calendar.getInstance().getTime().toString());

    ForkJoinPool fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procs);
    Set<Callable<ArrayList<String>>> collection;
    List<Future<ArrayList<String>>> futures;
    ArrayList<String> data = new ArrayList<String>((commitsize + 10));
    ArrayList<String> outdata = new ArrayList<String>(((commitsize + 10) * 3));
    int offenderhash = offset;

    boolean run = true;
    int iteration = 0;

    int currpos = 0;
    do {
        collection = new HashSet<Callable<ArrayList<String>>>(qnums);
        log.info("Getting Data");
        // get data
        currpos = iteration * commitsize + offset;
        iteration += 1;
        String query = select;

        if (extracondition != null) {
            query += " " + extracondition;
        }

        if (extracondition != null) {
            query += " WHERE " + extracondition + " AND ";
        } else {
            query += " WHERE ";
        }

        for (int i = 0; i < qnums; i++) {

            if (currpos + (Math.round(commitsize / qnums * (i + 1))) < currpos + commitsize) {
                collection.add(new SplitQuery((query + pullid + " >= "
                        + Integer.toString(currpos + (Math.round(commitsize / qnums * (i)))) + " AND " + pullid
                        + " < " + Integer.toString(currpos + (Math.round(commitsize / qnums * (i + 1)))))));
            } else {
                collection.add(new SplitQuery((query + pullid + " >= "
                        + Integer.toString(currpos + (Math.round(commitsize / qnums * (i)))) + " AND " + pullid
                        + " < " + Integer.toString(currpos + commitsize))));
            }
        }

        if (collection.size() > 0) {

            futures = fjp.invokeAll(collection);

            int w = 0;

            while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                w++;
            }

            for (Future<ArrayList<String>> f : futures) {
                try {
                    // TODO Get Pages to Parse
                    data.addAll(f.get());
                } catch (NullPointerException e) {
                    log.info("Some Data Returned Null");
                } catch (InterruptedException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (ExecutionException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }

        }

        collection = new HashSet<Callable<ArrayList<String>>>(data.size());
        // checkstring
        if (data.size() == 0 && checkstring != null && its <= maxchecks) {
            its++;
            collection.add(new SplitQuery(checkstring));

            futures = fjp.invokeAll(collection);

            int w = 0;
            while (fjp.isQuiescent() == false && fjp.getActiveThreadCount() > 0) {
                w++;
            }

            for (Future<ArrayList<String>> f : futures) {
                try {
                    // TODO Get Pages to Parse
                    data.addAll(f.get());
                } catch (NullPointerException e) {
                    log.info("Some Data Returned Null");
                } catch (InterruptedException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (ExecutionException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }

        }

        if (data.size() == 0) {
            // set to stop if size is0
            log.info("No Pages to Parse. Will Terminate");
            run = false;
        } else {
            // parse
            log.info("Starting JSoup Parse @ " + Calendar.getInstance().getTime().toString());
            for (String json : data) {
                // faster json reader is minimal json but faster parser is
                // Simple Json
                Map<String, Json> jMap = Json.read(json).asJsonMap();

                if (jMap.containsKey("offenderhash")) {
                    // string to int in case it is a string and has some
                    // extra space
                    offenderhash = Integer.parseInt(jMap.get("offenderhash").asString().trim());
                }

                boolean allow = true;

                if (mustcontain != null) {
                    if (jMap.get(column).asString().contains(mustcontain) == false) {
                        allow = false;
                    }
                }

                if (cannotcontain != null) {
                    if (jMap.get(column).asString().contains(cannotcontain)) {
                        allow = false;
                    }
                }

                // this is the fastest way. I was learning before and will
                // rewrite when time permits.
                if (allow == true) {
                    if (jMap.containsKey("offenderhash")) {
                        if (this.singlepaths != null) {
                            collection.add(new ParseSingle(Integer.toString(offenderhash), header, footer,
                                    pagenarrow, singlepaths,
                                    StringEscapeUtils.unescapeXml(jMap.get(column).asString()), replace,
                                    replaceSequence));
                        }

                        if (this.multipaths != null) {
                            collection.add(new ParseRows(Integer.toString(offenderhash), header, footer,
                                    pagenarrow, multipaths,
                                    StringEscapeUtils.unescapeXml(jMap.get(column).asString()), replace,
                                    replaceSequence));
                        }

                        if (this.recordpaths != null) {
                            collection.add(new ParseLoop(Integer.toString(offenderhash), header, footer,
                                    pagenarrow, recordpaths,
                                    StringEscapeUtils.unescapeXml(jMap.get(column).asString()), replace,
                                    replaceSequence));
                        }
                    }
                }
                offenderhash += 1;

            }

            // complete parse
            log.info("Waiting for Parsing to Complete.");
            if (collection.size() > 0) {
                futures = fjp.invokeAll(collection);

                int w = 0;
                while (fjp.isQuiescent() && fjp.getActiveThreadCount() > 0) {
                    w++;
                }

                log.info("Waited for " + Integer.toString(w) + " Cycles!");
                for (Future<ArrayList<String>> f : futures) {
                    try {
                        outdata.addAll(f.get());
                    } catch (InterruptedException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    } catch (ExecutionException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }

            }
            log.info("Finished Parsing @ " + Calendar.getInstance().getTime().toString());

            int cp = 0;
            // post data
            log.info("Posting Data @ " + Calendar.getInstance().getTime().toString());
            if (outdata.size() > 0) {

                for (int i = 0; i < qnums; i++) {

                    ArrayList<String> od = new ArrayList<String>(
                            ((cp + (Math.round(outdata.size() / qnums) - cp))));

                    if (cp + (Math.round(outdata.size() / qnums)) < outdata.size()) {
                        od.addAll(outdata.subList(cp, (cp + (Math.round(outdata.size() / qnums)))));
                    } else {
                        od.addAll(outdata.subList(cp, (outdata.size() - 1)));
                    }
                    fjp.execute(new SplitPost(template, od));
                    cp += Math.round(outdata.size() / qnums);
                }

                int w = 0;
                while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                    w++;
                }
                log.info("Waited for " + Integer.toString(w) + " cycles!");

            }
            log.info("Finished Posting to DB @ " + Calendar.getInstance().getTime().toString());

            // size should remain same with 10 slot buffer room
            data.clear();
            outdata.clear();
        }

        // my favorite really desperate attempt to actually invoke garbage
        // collection because of MASSIVE STRINGS
        System.gc();
        Runtime.getRuntime().gc();

    } while (run);

    log.info("Shutting Down FJP");
    // shutdown fjp
    if (fjp.isShutdown() == false) {
        fjp.shutdownNow();
    }

    log.info("Finished Parsing @ " + Calendar.getInstance().getTime().toString());

}

From source file:com.hygenics.parser.ParseDispatcher.java

/**
 * Fork/Join Pool Solution Maximizes Speed. JSon increases ease of use
 * //from   ww  w  .  j a v  a2 s .c om
 */
public void run() {
    log.info("Starting Clock and Parsing @" + Calendar.getInstance().getTime().toString());
    long t = Calendar.getInstance().getTimeInMillis();
    int pid = 0;
    int id = 0;
    int checkattempts = 0;
    String add = null;

    this.schema = Properties.getProperty(this.schema);
    this.select = Properties.getProperty(this.select);
    this.extracondition = Properties.getProperty(this.extracondition);
    this.column = Properties.getProperty(this.column);

    ArrayList<String> parsedrows = new ArrayList<String>();

    Set<Callable<String>> collect = new HashSet<Callable<String>>();
    List<Future<String>> futures;

    List<Future<ArrayList<String>>> qfutures;
    Set<Callable<ArrayList<String>>> qcollect = new HashSet<Callable<ArrayList<String>>>(4);

    ForkJoinPool fjp = new ForkJoinPool((int) Math.ceil(Runtime.getRuntime().availableProcessors() * procnum));

    if (schema != null) {
        createTables();
    }

    boolean run = true;
    String condition;
    int w = 0;
    int start = offset;
    int chunksize = (int) Math.ceil(pullsize / qnum);

    // attempt to query the database from multiple threads
    do {
        // query for pages
        pages = new ArrayList<String>(pullsize);
        log.info("Looking for Pages.");
        for (int conn = 0; conn < qnum; conn++) {
            // create condition
            condition = " WHERE " + pullid + " >= " + (start + (conn * chunksize)) + " AND " + pullid + " < "
                    + Integer.toString(start + (chunksize * (conn + 1)));

            if (extracondition != null) {
                condition += " " + extracondition.trim();
            }

            // get queries
            qcollect.add(new SplitQuery(template, (select + condition)));
            log.info("Fetching " + select + condition);
        }
        start += (chunksize * qnum);

        qfutures = fjp.invokeAll(qcollect);

        w = 0;
        while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
            w++;
        }
        log.info("Waited for " + w + " cycles");

        for (Future<ArrayList<String>> f : qfutures) {
            try {

                ArrayList<String> test = f.get();
                if (test != null) {
                    if (test.size() > 0) {
                        pages.addAll(test);
                    }
                }

                if (f.isDone() == false) {
                    f.cancel(true);
                }

                f = null;
            } catch (Exception e) {
                log.warn("Encoding Error!");
                e.printStackTrace();
            }
        }
        qcollect = new HashSet<Callable<ArrayList<String>>>(4);
        qfutures = null;
        log.info("Finished Getting Pages");

        // if no records then get records that may have been dropped
        if (pages.size() == 0 && checkstring != null && checkstring.trim().length() > 0
                && checkattempts < reattempts) {
            checkattempts += 1;
            log.info("Checking for Drops");
            qcollect.add(new SplitQuery(template, (checkstring)));
            qfutures = fjp.invokeAll(qcollect);

            w = 0;
            while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                w++;
            }
            log.info("Waited for " + w + " cycles");

            for (Future<ArrayList<String>> f : qfutures) {
                try {

                    ArrayList<String> test = f.get();
                    if (test != null) {
                        if (test.size() > 0) {
                            pages.addAll(test);
                        }
                    }

                    if (f.isDone() == false) {
                        f.cancel(true);
                    }

                    f = null;
                } catch (Exception e) {
                    log.warn("Encoding Error!");
                    e.printStackTrace();
                }
            }
            qfutures = null;
            qcollect = new HashSet<Callable<ArrayList<String>>>(4);

        } else if (checkattempts >= reattempts) {
            pages.clear();
        }

        log.info("Found " + pages.size() + " records!");

        // get hashes if necessary
        if (getHash) {
            log.info("Hashing " + pages.size() + " Records");

            ArrayList<String> hashedrows = new ArrayList<String>();
            for (String row : pages) {

                collect.add(new CreateHash(row, pid));
                pid++;

            }

            log.info("Invoking");
            futures = fjp.invokeAll(collect);

            w = 0;
            while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                w++;
            }

            log.info("Waited " + w + " Cycles!");

            for (Future<String> f : futures) {
                if (f != null) {
                    String json;
                    try {
                        json = f.get(termtime, TimeUnit.MILLISECONDS);

                        if (json != null) {
                            hashedrows.add(json);
                        }

                    } catch (Exception e) {
                        log.warn("Encoding Error!");
                        e.printStackTrace();
                    }
                }

            }
            log.info("Hashed " + hashedrows.size() + " Records!");
            pages = hashedrows;

            collect = new HashSet<Callable<String>>(pullsize);
            futures.clear();
            log.info("Completed Hashing");
        }

        log.info("Performing Regex");
        // handle single patterns
        int i = 0;
        if (singlepats != null) {

            log.info("Found Singlepats");
            int subs = 0;
            int rows = 0;
            for (String row : pages) {
                rows += 1;
                String inrow = row;
                try {

                    inrow = inrow.replaceAll("\t|\r|\r\n|\n", "");

                    Map<String, Json> jmap = Json.read(inrow).asJsonMap();

                    if (singlepats.containsKey("table")) {
                        subs += 1;

                        if (fjp.isShutdown()) {
                            fjp = new ForkJoinPool((Runtime.getRuntime().availableProcessors() * procnum));
                        }

                        if (jmap.get(column) != null) {

                            if (test) {
                                System.out.println("//////////////////////HTML////////////////////////\n"
                                        + jmap.get(column).asString()
                                        + "\n///////////////////////////////END///////////////////////////\n\n");
                            }

                            if (mustcontain != null) {
                                if (jmap.get(column).asString().contains(mustcontain)) {
                                    if (cannotcontain != null) {
                                        if (jmap.get(column).asString().contains(cannotcontain) == false)
                                            collect.add(new ParsePage(unescape, replacementPattern,
                                                    singlepats.get("table"),
                                                    jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                                    singlepats, Calendar.getInstance().getTime().toString(),
                                                    jmap.get("offenderhash").asString()));
                                    } else {
                                        collect.add(new ParsePage(unescape, replacementPattern,
                                                singlepats.get("table"),
                                                jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                                singlepats, Calendar.getInstance().getTime().toString(),
                                                jmap.get("offenderhash").asString()));
                                    }
                                }
                            } else if (cannotcontain != null) {
                                if (jmap.get(column).asString().contains(cannotcontain) == false) {
                                    collect.add(
                                            new ParsePage(unescape, replacementPattern, singlepats.get("table"),
                                                    jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                                    singlepats, Calendar.getInstance().getTime().toString(),
                                                    jmap.get("offenderhash").asString()));
                                }
                            } else {
                                collect.add(new ParsePage(unescape, replacementPattern, singlepats.get("table"),
                                        jmap.get(column).asString().replaceAll("\\s\\s", " "), singlepats,
                                        Calendar.getInstance().getTime().toString(),
                                        jmap.get("offenderhash").asString()));
                            }
                        }
                    }
                    i++;

                    if (((i % commit_size) == 0 & i != 0) || i == pages.size()
                            || pages.size() == 1 && singlepats != null) {
                        log.info("Getting Regex Results");

                        log.info("Getting Tasks");

                        futures = fjp.invokeAll(collect);

                        w = 0;

                        while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                            w++;
                        }

                        log.info("Waited for " + w + " cycles");

                        for (Future<String> r : futures) {
                            try {

                                add = r.get();
                                if (add.contains("No Data") == false) {
                                    parsedrows.add(add);
                                }

                                add = null;

                            } catch (Exception e) {
                                log.warn("Encoding Error!");
                                e.printStackTrace();
                            }
                        }

                        futures = null;
                        collect = new HashSet<Callable<String>>();

                        if (parsedrows.size() >= commit_size) {
                            log.info("INSERTING " + parsedrows.size() + " records!");
                            if (parsedrows.size() >= SPLITSIZE) {
                                sendToDb(parsedrows, true);
                            } else {
                                sendToDb(parsedrows, false);
                            }

                            parsedrows = new ArrayList<String>(pullsize);
                        }

                        // hint to the gc in case it actually pays off; use
                        // -X:compactexplicitgc to improve odds and
                        // -XX:UseConcMarkSweepGC for improving odds on
                        // older generation strings
                        // (think if i were a gambling man)
                        System.gc();
                        Runtime.getRuntime().gc();
                    }
                } catch (Exception e) {
                    log.warn("Encoding Error!");
                    e.printStackTrace();
                }
            }
            log.info("Submitted " + subs + " records. Found " + rows + " rows");
        }

        log.info("REMAINING ROWS TO COMMIT " + parsedrows.size());
        log.info("Rows Left" + parsedrows.size());
        if (parsedrows.size() > 0) {

            if (parsedrows.size() >= SPLITSIZE) {
                sendToDb(parsedrows, true);
            } else {
                sendToDb(parsedrows, false);
            }

            parsedrows = new ArrayList<String>();
        }

        // handle multi patterns
        if (multipats != null) {
            // parse multiple pages for the run
            int subs = 0;
            for (String row : pages) {
                try {
                    for (String k : multipats.keySet()) {
                        if (fjp.isShutdown()) {

                            fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors());
                        }

                        Map<String, Json> jmap = Json.read(row).asJsonMap();

                        if (jmap.get(column) != null) {
                            subs += 1;
                            if (test) {
                                System.out.println("//////////////////////HTML////////////////////////\n"
                                        + jmap.get(column).asString()
                                        + "\n///////////////////////////////END///////////////////////////\n\n");
                            }

                            if (mustcontain != null) {
                                if (jmap.get(column).asString().contains(mustcontain)) {
                                    if (cannotcontain != null) {
                                        if (jmap.get(column).asString().contains(cannotcontain) == false) {
                                            collect.add(
                                                    new ParseMultiPage(unescape, replacementPattern, k,
                                                            jmap.get(column).asString().replaceAll("\\s\\s",
                                                                    " "),
                                                            jmap.get("offenderhash").asString(),
                                                            Calendar.getInstance().getTime().toString(),
                                                            multipats.get(k)));
                                        }
                                    } else {
                                        collect.add(new ParseMultiPage(unescape, replacementPattern, k,
                                                jmap.get(column).asString(),
                                                jmap.get("offenderhash").asString().replaceAll("\\s\\s", " "),
                                                Calendar.getInstance().getTime().toString(), multipats.get(k)));
                                    }
                                }
                            } else if (cannotcontain != null) {
                                if (jmap.get(column).asString().contains(cannotcontain) == false) {
                                    collect.add(new ParseMultiPage(unescape, replacementPattern, k,
                                            jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                            jmap.get("offenderhash").asString(),
                                            Calendar.getInstance().getTime().toString(), multipats.get(k)));
                                }

                            } else {
                                collect.add(new ParseMultiPage(unescape, replacementPattern, k,
                                        jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                        jmap.get("offenderhash").asString(),
                                        Calendar.getInstance().getTime().toString(), multipats.get(k)));
                            }
                        }

                        i++;
                        if (((i % commit_size) == 0 & i != 0) || i == pages.size()
                                || pages.size() == 1 && multipats != null) {
                            futures = fjp.invokeAll(collect);
                            w = 0;
                            while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                                w++;
                            }

                            log.info("Waited " + w + " Cycles");

                            for (Future<String> r : futures) {
                                try {
                                    add = r.get();

                                    if (add.contains("No Data") == false) {

                                        for (String js : add.split("~")) {
                                            parsedrows.add(js);
                                        }
                                    }
                                    add = null;

                                    if (r.isDone() == false) {
                                        r.cancel(true);
                                    }
                                    r = null;

                                } catch (InterruptedException e) {
                                    // TODO Auto-generated catch block
                                    e.printStackTrace();
                                } catch (ExecutionException e) {
                                    // TODO Auto-generated catch block
                                    e.printStackTrace();
                                }
                            }

                            futures = null;
                            collect = new HashSet<Callable<String>>();

                            if (parsedrows.size() >= commit_size) {
                                log.info("INSERTING " + parsedrows.size() + " records!");
                                if (parsedrows.size() >= SPLITSIZE) {
                                    sendToDb(parsedrows, true);
                                } else {
                                    sendToDb(parsedrows, false);
                                }
                                parsedrows = new ArrayList<String>(pullsize);
                            }

                            // hint to the gc in case it actually pays off
                            System.gc();
                            Runtime.getRuntime().gc();
                        }
                    }

                } catch (Exception e) {
                    log.warn("Encoding Error!");
                }

            }
            log.info("Submitted " + subs + " records.");
        }

        // handle looped patterns
        if (loopedpats != null) {
            log.info("Looped Patterns Found");
            int subs = 0;
            if (fjp.isShutdown()) {
                fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procnum);
            }

            for (String row : pages) {
                try {

                    for (String k : loopedpats.keySet()) {
                        if (fjp.isShutdown()) {
                            fjp = new ForkJoinPool(Runtime.getRuntime().availableProcessors() * procnum);
                        }
                        Map<String, Json> jmap = Json.read(row).asJsonMap();

                        if (jmap.get(column) != null) {
                            subs += 1;
                            if (mustcontain != null) {
                                if (jmap.get(column).asString().contains(mustcontain)) {
                                    if (cannotcontain != null) {
                                        if (jmap.get(column).asString().contains(cannotcontain) == false) {
                                            collect.add(
                                                    new LoopRegex(unescape,
                                                            jmap.get(column).asString().replaceAll("\\s\\s",
                                                                    " "),
                                                            jmap.get("offenderhash").asString(),
                                                            Calendar.getInstance().getTime().toString(), k,
                                                            replacementPattern, loopedpats.get(k), test));
                                        }
                                    } else {
                                        collect.add(new LoopRegex(unescape,
                                                jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                                jmap.get("offenderhash").asString(),
                                                Calendar.getInstance().getTime().toString(), k,
                                                replacementPattern, loopedpats.get(k), test));
                                    }
                                }
                            } else if (cannotcontain != null) {
                                if (jmap.get(column).asString().contains(cannotcontain) == false) {
                                    collect.add(new LoopRegex(unescape,
                                            jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                            jmap.get("offenderhash").asString(),
                                            Calendar.getInstance().getTime().toString(), k, replacementPattern,
                                            loopedpats.get(k), test));
                                }
                            } else {
                                collect.add(new LoopRegex(unescape,
                                        jmap.get(column).asString().replaceAll("\\s\\s", " "),
                                        jmap.get("offenderhash").asString(),
                                        Calendar.getInstance().getTime().toString(), k, replacementPattern,
                                        loopedpats.get(k), test));
                            }
                            jmap.remove(k);
                        }
                        i++;
                        if (((i % commit_size) == 0 & i != 0) || (i % (pages.size() - 1)) == 0
                                || pages.size() == 1) {

                            futures = fjp.invokeAll(collect);

                            w = 0;

                            while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                                w++;
                            }
                            log.info("Waited " + w + " Cycles");

                            for (Future<String> r : futures) {
                                try {
                                    add = r.get();
                                    if (add.contains("No Data") == false) {
                                        for (String toarr : add.split("~")) {
                                            parsedrows.add(toarr);
                                        }
                                    }

                                    if (r.isDone() == false) {
                                        r.cancel(true);
                                    }
                                    add = null;

                                } catch (Exception e) {
                                    log.warn("Encoding Error!");
                                    e.printStackTrace();
                                }
                            }

                            futures = null;
                            collect = new HashSet<Callable<String>>();

                            // hint to the gc in case it actually pays off
                            System.gc();
                            Runtime.getRuntime().gc();
                        }
                    }

                    if (parsedrows.size() >= this.commit_size) {
                        log.info("INSERTING " + parsedrows.size() + " records!");
                        if (parsedrows.size() >= SPLITSIZE) {
                            sendToDb(parsedrows, true);
                        } else {
                            sendToDb(parsedrows, false);
                        }

                        parsedrows = new ArrayList<String>(pullsize);
                    }

                } catch (Exception e) {
                    log.warn("Encoding Error!");
                }
            }
            log.info("Submitted " + subs + " records.");
        }

        if (collect.size() > 0) {
            log.info("Getting Last Regex Results for Iteration");

            log.info("Getting Tasks");

            futures = fjp.invokeAll(collect);

            w = 0;

            while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                w++;
            }

            log.info("Waited for " + w + " cycles");

            for (Future<String> r : futures) {
                try {

                    add = r.get();
                    if (add.contains("No Data") == false) {
                        parsedrows.add(add);
                    }

                    add = null;

                } catch (Exception e) {
                    log.warn("Encoding Error!");
                    e.printStackTrace();
                }
            }

            futures = null;
            collect = new HashSet<Callable<String>>(pullsize);
            // hint to the gc in case it actually pays off; use
            // -X:compactexplicitgc to improve odds and
            // -XX:UseConcMarkSweepGC for improving odds on older generation
            // strings
            // (think if i were a gambling man)
            System.gc();
            Runtime.getRuntime().gc();
        }

        log.info("REMAINING ROWS TO COMMIT " + parsedrows.size());
        log.info("Rows Left" + parsedrows.size());
        if (parsedrows.size() > 0) {

            if (parsedrows.size() >= SPLITSIZE) {
                sendToDb(parsedrows, true);
            } else {
                sendToDb(parsedrows, false);
            }

            parsedrows = new ArrayList<String>();
        }

    } while (pages != null && pages.size() > 0);

    // ensure that nothing is still caught in limbo
    // final parser to ensure that nothing is left out
    if (collect.size() > 0) {
        log.info("More Rows Caught in FJP, Completing Process");
        futures = fjp.invokeAll(collect);

        w = 0;

        while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
            w++;
        }
        log.info("Waited " + w + " Cycles");

        for (Future<String> r : futures) {
            try {
                add = r.get();

                if (add.contains("No Data") == false) {

                    for (String js : add.split("~")) {
                        parsedrows.add(js);
                    }
                }
                add = null;

                if (r.isDone() == false) {
                    r.cancel(true);
                }
                r = null;

            } catch (InterruptedException e) {
                e.printStackTrace();
            } catch (ExecutionException e) {
                e.printStackTrace();
            }
        }

        futures = null;
        collect = null;
    }

    // send any remaining parsed rows to the db
    if (parsedrows.size() > 0) {

        if (parsedrows.size() >= SPLITSIZE) {
            sendToDb(parsedrows, true);
        } else {
            sendToDb(parsedrows, false);
        }

        parsedrows = new ArrayList<String>();
    }

    log.info("Shutting Down Fork Join Pool");
    if (fjp.isShutdown() == false) {
        fjp.shutdownNow();
    }

    fjp = null;

    log.info("Complete @" + Calendar.getInstance().getTime().toString());
    log.info("Total Runtime(seconds): "
            + Double.toString((double) (Calendar.getInstance().getTimeInMillis() - t) / 1000));

    // hint to the gc in case it actually pays off
    System.gc();
    Runtime.getRuntime().gc();
}

From source file:com.hygenics.parser.JDump.java

private void toFile() {
    ArrayList<String> archs = new ArrayList<String>();
    List<Future<ArrayList<String>>> qfutures;
    Set<Callable<ArrayList<String>>> qcollect = new HashSet<Callable<ArrayList<String>>>(4);

    ForkJoinPool fjp = new ForkJoinPool((int) Math.ceil(Runtime.getRuntime().availableProcessors() * procnum));

    int dumped = 0;

    if (archive) {
        log.info("Cleaning");
        for (String k : fpaths.keySet()) {
            String fpath = "";

            for (String ofp : fpaths.get(k).keySet()) {
                fpath = ofp;/*from  ww w .  j  a  v a2  s  .com*/
            }

            if (fpath.length() > 0) {
                String[] barr = fpath.split("\\/");
                String basefile = "";
                Archiver zip = new Archiver();
                for (int i = 0; i > barr.length - 1; i++) {
                    basefile += (i == 0) ? barr[i] : "/" + barr[i];
                }
                if (basefile.trim().length() > 0) {
                    zip.setBasedirectory(basefile);
                    zip.setZipDirectory(basefile + "archive.zip");
                    zip.setAvoidanceString(".zip|archive");
                    zip.setDelFiles(true);
                    zip.run();
                }
            }
        }
    }

    log.info("Dumping");
    for (String table : fpaths.keySet()) {
        int offset = 0;
        if (template.checkTable(table, table.split("\\.")[0])) {
            if (template.getCount(table) > 0) {
                log.info("Dumping for " + table);
                // get header
                String select = "SELECT * FROM " + table;
                String fpath = null;
                ArrayList<String> jsons;
                String condition;
                int w = 0;
                int start = offset;
                int chunksize = (int) Math.ceil(pullsize / qnum);

                // get fpath
                for (String ofp : fpaths.get(table).keySet()) {
                    start = fpaths.get(table).get(ofp);
                    fpath = ofp;
                }

                // perform write
                if (headers != null && fpath != null) {
                    List<String> headersList = headers.get(table);

                    String output = null;
                    boolean existed = true;

                    if (addFileDate) {
                        fpath = fpath
                                + Calendar.getInstance().getTime().toString().trim().replaceAll(":|\\s", "")
                                + ".txt";
                    }

                    // check to see if file should be created
                    if (!new File(fpath).exists()) {

                        try {
                            new File(fpath).createNewFile();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                        existed = false;
                    }

                    // check to see if file must be recreated
                    if (!append) {

                        File f = new File(fpath);
                        f.delete();
                        try {
                            f.createNewFile();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }

                    if (headersList != null && (append == false || existed == false)) {
                        for (String header : headersList) {
                            output = (output == null) ? StringEscapeUtils.unescapeXml(header)
                                    : output + delimeter + StringEscapeUtils.unescapeXml(header);
                        }
                    }

                    do {

                        // get records
                        jsons = new ArrayList<String>(pullsize);
                        log.info("Looking for Pages.");
                        for (int conn = 0; conn < qnum; conn++) {
                            // create condition
                            condition = " WHERE " + pullid + " >= " + (start + (conn * chunksize)) + " AND "
                                    + pullid + " < " + Integer.toString(start + (chunksize * (conn + 1)));

                            if (extracondition != null) {
                                condition += " " + extracondition.trim();
                            }

                            // get queries
                            qcollect.add(new SplitQuery(template, (select + condition)));
                            log.info("Fetching " + select + condition);
                        }

                        start += (chunksize * qnum);

                        qfutures = fjp.invokeAll(qcollect);

                        w = 0;
                        while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                            w++;
                        }
                        log.info("Waited for " + w + " cycles");

                        for (Future<ArrayList<String>> f : qfutures) {
                            try {

                                ArrayList<String> test = f.get();
                                if (test != null) {
                                    if (test.size() > 0) {
                                        jsons.addAll(test);
                                    }
                                }

                                if (f.isDone() == false) {
                                    f.cancel(true);
                                }

                                f = null;
                            } catch (Exception e) {
                                log.warn("Encoding Error!");
                                e.printStackTrace();
                            }
                        }
                        qcollect = new HashSet<Callable<ArrayList<String>>>(4);
                        qfutures = null;
                        log.info("Finished Getting Pages");

                        // post records to the file
                        try (FileWriter fw = new FileWriter(new File(fpath), true)) {
                            // get and write headers

                            if (jsons.size() > 0) {
                                fw.write(output + "\n");
                                // write data
                                for (String json : jsons) {
                                    output = null;
                                    JsonObject jo = JsonObject.readFrom(json);
                                    if (jo.size() >= headersList.size()) {// allows
                                        // trimming
                                        // of
                                        // table
                                        // to
                                        // key
                                        // aspects
                                        output = null;

                                        for (String key : headers.get(table)) {

                                            if (jo.get(key.toLowerCase()) != null) {
                                                String data = StringEscapeUtils
                                                        .unescapeXml(jo.get(key.toLowerCase()).asString());

                                                if (replacementPattern != null) {
                                                    data = data.replaceAll(replacementPattern, "");
                                                    data = data.replace(delimeter, delimreplace);
                                                }

                                                output = (output == null)
                                                        ? data.replaceAll("[^\u0020-\u0070 ]+", "")
                                                        : output + delimeter
                                                                + data.replaceAll("[^\u0020-\u0070 ]+", "");
                                            } else {
                                                output += delimeter;
                                            }
                                        }

                                        if (output != null && output.trim().length() > headersList.size()) {
                                            fw.write(output + "\n");
                                        }
                                    } else {
                                        if (jsons.size() == 0) {
                                            Log.info(
                                                    "Number of Headers and Keys from Json Array and Headers List Impossible to Match");
                                            try {
                                                throw new MismatchException(
                                                        "Number of Headers: " + headersList.size()
                                                                + " && Number of Keys: " + jo.size());
                                            } catch (MismatchException e) {
                                                e.printStackTrace();
                                            }
                                        }
                                    }

                                    output = null;
                                }
                            } else {
                                log.info("EOF FOUND! No New Records in This Iteration....Stopping.");
                            }
                        } catch (IOException e) {
                            e.printStackTrace();
                        }

                    } while (jsons.size() > 0);

                } else {
                    try {
                        throw new NullPointerException(
                                "No Headers Input to Class. Please Create the Requisite Map.");
                    } catch (NullPointerException e) {
                        e.printStackTrace();
                    }
                }
                dumped += 1;
            } else {
                try {
                    throw new NoDataException("No Data Found in Table " + table);
                } catch (NoDataException e) {
                    e.printStackTrace();
                }
            }
        } else {
            log.info("Missing Table " + table);
            try {
                throw new NullPointerException("Table " + table + " Does Not Exist!!!");
            } catch (NullPointerException e) {
                e.printStackTrace();
            }
        }
    } // end LOOP

    if (!fjp.isShutdown()) {
        fjp.shutdownNow();
    }

    if (dumped == 0) {
        log.error("No Data Found in Any Table");
        System.exit(-1);
    }
}

From source file:com.hygenics.parser.JDumpWithReference.java

private void toFile() {
    List<Future<ArrayList<String>>> qfutures;
    Set<Callable<ArrayList<String>>> qcollect = new HashSet<Callable<ArrayList<String>>>(4);

    ForkJoinPool fjp = new ForkJoinPool((int) Math.ceil(Runtime.getRuntime().availableProcessors() * procnum));
    int dumped = 0;

    if (archive) {
        log.info("Cleaning");
        for (String k : fpaths.keySet()) {
            String fpath = "";

            for (String ofp : fpaths.get(k).keySet()) {
                fpath = ofp;//from w  ww .  java2 s .  c  o m
            }

            if (fpath.length() > 0) {
                String[] barr = fpath.split("\\/");
                String basefile = "";
                Archiver zip = new Archiver();
                for (int i = 0; i > barr.length - 1; i++) {
                    basefile += (i == 0) ? barr[i] : "/" + barr[i];
                }
                if (basefile.trim().length() > 0) {
                    zip.setBasedirectory(basefile);
                    zip.setZipDirectory(basefile + "archive.zip");
                    zip.setAvoidanceString(".zip|archive");
                    zip.setDelFiles(true);
                    zip.run();
                }
            }
        }
    }

    log.info("Dumping");
    for (String table : fpaths.keySet()) {
        int offset = 0;
        if (template.checkTable(this.baseschema + "." + table, this.baseschema)) {
            if (template.getCount(this.baseschema + "." + table) > 0) {
                log.info("Dumping for " + table);
                // get header
                String select = "SELECT * FROM " + this.baseschema + "." + table;
                String fpath = null;
                ArrayList<String> jsons;
                String condition;
                int w = 0;
                int start = offset;
                int chunksize = (int) Math.ceil(pullsize / qnum);

                // get fpath
                for (String ofp : fpaths.get(table).keySet()) {
                    start = fpaths.get(table).get(ofp);
                    fpath = ofp;
                }

                // perform write
                if (headers != null && fpath != null) {
                    List<String> headersList = headers.get(table);

                    String output = null;
                    boolean existed = true;

                    if (addFileDate) {
                        fpath = fpath
                                + Calendar.getInstance().getTime().toString().trim().replaceAll(":|\\s", "")
                                + ".txt";
                    }

                    // check to see if file should be created
                    if (!new File(fpath).exists()) {

                        try {
                            new File(this.baseFilePath + fpath).createNewFile();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                        existed = false;
                    }

                    // check to see if file must be recreated
                    if (!append) {

                        File f = new File(this.baseFilePath + fpath);
                        f.delete();
                        try {
                            f.createNewFile();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }

                    if (headersList != null && (append == false || existed == false)) {
                        for (String header : headersList) {
                            output = (output == null) ? StringEscapeUtils.unescapeXml(header)
                                    : output + delimeter + StringEscapeUtils.unescapeXml(header);
                        }
                    }

                    do {

                        // get records
                        jsons = new ArrayList<String>(pullsize);
                        log.info("Looking for Pages.");
                        for (int conn = 0; conn < qnum; conn++) {
                            // create condition
                            condition = " WHERE " + pullid + " >= " + (start + (conn * chunksize)) + " AND "
                                    + pullid + " < " + Integer.toString(start + (chunksize * (conn + 1)));

                            if (extracondition != null) {
                                condition += " " + extracondition.trim();
                            }

                            // get queries
                            qcollect.add(new SplitQuery(template, (select + condition)));
                            log.info("Fetching " + select + condition);
                        }

                        start += (chunksize * qnum);

                        qfutures = fjp.invokeAll(qcollect);

                        w = 0;
                        while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) {
                            w++;
                        }
                        log.info("Waited for " + w + " cycles");

                        for (Future<ArrayList<String>> f : qfutures) {
                            try {

                                ArrayList<String> test = f.get();
                                if (test != null) {
                                    if (test.size() > 0) {
                                        jsons.addAll(test);
                                    }
                                }

                                if (f.isDone() == false) {
                                    f.cancel(true);
                                }

                                f = null;
                            } catch (Exception e) {
                                log.warn("Encoding Error!");
                                e.printStackTrace();
                            }
                        }
                        qcollect = new HashSet<Callable<ArrayList<String>>>(4);
                        qfutures = null;
                        log.info("Finished Getting Pages");

                        // post records to the file
                        try (FileWriter fw = new FileWriter(new File(this.baseFilePath + fpath), true)) {
                            // get and write headers

                            if (jsons.size() > 0) {
                                fw.write(output + "\n");
                                // write data
                                for (String json : jsons) {
                                    output = null;
                                    JsonObject jo = JsonObject.readFrom(json);
                                    if (jo.size() >= headersList.size()) {// allows
                                        // trimming
                                        // of
                                        // table
                                        // to
                                        // key
                                        // aspects
                                        output = null;

                                        for (String key : headers.get(table)) {

                                            if (jo.get(key.toLowerCase()) != null) {
                                                String data = StringEscapeUtils
                                                        .unescapeXml(jo.get(key.toLowerCase()).asString());

                                                if (replacementPattern != null) {
                                                    data = data.replaceAll(replacementPattern, "");
                                                    data = data.replace(delimeter, delimreplace);
                                                }

                                                output = (output == null)
                                                        ? data.replaceAll("[^\u0020-\u007E ]+", "")
                                                        : output + delimeter
                                                                + data.replaceAll("[^\u0020-\u007E ]+", "");
                                            } else {
                                                output += delimeter;
                                            }
                                        }

                                        if (output != null && output.trim().length() > headersList.size()) {
                                            fw.write(output + "\n");
                                        }
                                    } else {
                                        if (jsons.size() == 0) {
                                            Log.info(
                                                    "Number of Headers and Keys from Json Array and Headers List Impossible to Match");
                                            try {
                                                throw new MismatchException(
                                                        "Number of Headers: " + headersList.size()
                                                                + " && Number of Keys: " + jo.size());
                                            } catch (MismatchException e) {
                                                e.printStackTrace();
                                            }
                                        }
                                    }

                                    output = null;
                                }
                            } else {
                                log.info("EOF FOUND! No New Records in This Iteration....Stopping.");
                            }
                        } catch (IOException e) {
                            e.printStackTrace();
                        }

                    } while (jsons.size() > 0);

                } else {
                    try {
                        throw new NullPointerException(
                                "No Headers Input to Class. Please Create the Requisite Map.");
                    } catch (NullPointerException e) {
                        e.printStackTrace();
                    }
                }
                dumped += 1;
            } else {
                try {
                    throw new NoDataException("No Data in Table " + table);
                } catch (NoDataException e) {
                    e.printStackTrace();
                }
            }
        } else {
            log.info("Missing Table " + table);
            try {
                throw new NullPointerException("Table " + table + " Does Not Exist!!!");
            } catch (NullPointerException e) {
                e.printStackTrace();
            }
        }
    } // end LOOP

    if (!fjp.isShutdown()) {
        fjp.shutdownNow();
    }

    if (dumped == 0) {
        log.error("No Data found in Any Tables");
        System.exit(-1);
    }
}