Example usage for java.util.concurrent ThreadPoolExecutor isTerminated

List of usage examples for java.util.concurrent ThreadPoolExecutor isTerminated

Introduction

In this page you can find the example usage for java.util.concurrent ThreadPoolExecutor isTerminated.

Prototype

public boolean isTerminated() 

Source Link

Usage

From source file:org.trnltk.apps.tokenizer.TextTokenizerCorpusApp.java

@App("Creates tokenized files")
public void tokenizeBig_files_onSource() throws IOException, InterruptedException {
    final StopWatch taskStopWatch = new StopWatch();
    taskStopWatch.start();//  w  w w. ja  v a2  s. c  o  m

    final File parentFolder = new File("D:\\devl\\data\\aakindan");
    final File sourceFolder = new File(parentFolder, "src_split");
    final File targetFolder = new File(parentFolder, "src_split_tokenized");
    final File errorFolder = new File(parentFolder, "src_split_tokenization_error");
    final File[] files = sourceFolder.listFiles();
    Validate.notNull(files);

    final List<File> filesToTokenize = new ArrayList<File>();
    for (File file : files) {
        if (file.isDirectory())
            continue;

        filesToTokenize.add(file);
    }

    int lineCountOfAllFiles = 0;
    for (File file : filesToTokenize) {
        lineCountOfAllFiles += Utilities.lineCount(file);
    }

    System.out.println("Total lines in all files " + lineCountOfAllFiles);

    final StopWatch callbackStopWatch = new StopWatch();
    final TokenizationCommandCallback callback = new TokenizationCommandCallback(lineCountOfAllFiles,
            callbackStopWatch);

    int NUMBER_OF_THREADS = 8;
    final ThreadPoolExecutor pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(NUMBER_OF_THREADS);

    callbackStopWatch.start();
    for (File sourceFile : filesToTokenize) {
        final String fileBaseName = sourceFile.getName().substring(0,
                sourceFile.getName().length() - ".txt.0000".length());
        final String index = FilenameUtils.getExtension(sourceFile.getName());
        final File targetFile = new File(targetFolder, fileBaseName + "_tokenized.txt." + index);
        final File errorFile = new File(errorFolder, fileBaseName + "_tokenization_error.txt." + index);

        pool.execute(
                new TokenizationCommand(callback, fastRelaxedTokenizer, sourceFile, targetFile, errorFile));
    }

    pool.shutdown();
    while (!pool.isTerminated()) {
        //            System.out.println("Waiting pool to be terminated!");
        pool.awaitTermination(3000, TimeUnit.MILLISECONDS);
    }

    callbackStopWatch.stop();
    taskStopWatch.stop();
    System.out.println("Total time :" + taskStopWatch.toString());
    System.out.println("Nr of tokens : " + callback.getNumberOfTokens());
    System.out.println(
            "Avg time : " + (taskStopWatch.getTime() * 1.0d) / (callback.getNumberOfTokens() * 1.0d) + " ms");
}

From source file:org.trnltk.apps.tokenizer.TextTokenizerCorpusApp.java

@App("Creates tokenized files")
public void convertTokensToLines_Big_files_onSource() throws IOException, InterruptedException {
    final StopWatch taskStopWatch = new StopWatch();
    taskStopWatch.start();//  www. j av a  2s  .  c o  m

    final File parentFolder = new File("D:\\devl\\data\\aakindan");
    final File sourceFolder = new File(parentFolder, "src_split_tokenized");
    final File targetFolder = new File(parentFolder, "src_split_tokenized_lines");
    final File[] files = sourceFolder.listFiles();
    Validate.notNull(files);

    final List<File> filesToTokenize = new ArrayList<File>();
    for (File file : files) {
        if (file.isDirectory())
            continue;

        filesToTokenize.add(file);
    }

    final StopWatch callbackStopWatch = new StopWatch();

    int NUMBER_OF_THREADS = 8;
    final ThreadPoolExecutor pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(NUMBER_OF_THREADS);

    callbackStopWatch.start();
    for (final File sourceFile : filesToTokenize) {
        final File targetFile = new File(targetFolder, sourceFile.getName());
        pool.execute(new Runnable() {
            @Override
            public void run() {
                System.out.println("Processing file " + sourceFile);
                BufferedWriter writer = null;
                try {
                    final List<String> lines = Files.readLines(sourceFile, Charsets.UTF_8);
                    writer = Files.newWriter(targetFile, Charsets.UTF_8);
                    for (String line : lines) {
                        final Iterable<String> tokens = Splitter.on(' ').omitEmptyStrings().trimResults()
                                .split(line);
                        for (String token : tokens) {
                            writer.write(token);
                            writer.write("\n");
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                } finally {
                    if (writer != null)
                        try {
                            writer.close();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                }
            }
        });
    }

    pool.shutdown();
    while (!pool.isTerminated()) {
        //            System.out.println("Waiting pool to be terminated!");
        pool.awaitTermination(3000, TimeUnit.MILLISECONDS);
    }

    callbackStopWatch.stop();
    taskStopWatch.stop();
    System.out.println("Total time :" + taskStopWatch.toString());
}

From source file:org.trnltk.apps.tokenizer.TextTokenizerCorpusApp.java

@App("Creates tokenized files")
public void findUniqueChars_Big_files_onSource() throws IOException, InterruptedException {
    final StopWatch taskStopWatch = new StopWatch();
    taskStopWatch.start();// w  ww.  ja va2  s  .  co  m

    final File parentFolder = new File("D:\\devl\\data\\aakindan");
    final File targetFile = new File(parentFolder, "chars_with_occurrence.txt");
    final File sourceFolder = new File(parentFolder, "src_split_tokenized_lines");
    final File[] files = sourceFolder.listFiles();
    Validate.notNull(files);

    final List<File> filesToInvestigate = new ArrayList<File>();
    for (File file : files) {
        if (file.isDirectory())
            continue;

        filesToInvestigate.add(file);
    }

    final StopWatch callbackStopWatch = new StopWatch();

    int NUMBER_OF_THREADS = 8;
    final ThreadPoolExecutor pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(NUMBER_OF_THREADS);
    final boolean[] b = new boolean[65536 * 5];

    callbackStopWatch.start();
    for (final File sourceFile : filesToInvestigate) {
        pool.execute(new Runnable() {
            @Override
            public void run() {
                System.out.println("Processing file " + sourceFile);
                try {
                    final List<String> lines = Files.readLines(sourceFile, Charsets.UTF_8);
                    for (String token : lines) {
                        for (int i = 0; i < token.length(); i++) {
                            char aChar = token.charAt(i);
                            b[aChar] = true;
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
    }

    pool.shutdown();
    while (!pool.isTerminated()) {
        //            System.out.println("Waiting pool to be terminated!");
        pool.awaitTermination(3000, TimeUnit.MILLISECONDS);
    }

    final BufferedWriter writer = Files.newWriter(targetFile, Charsets.UTF_8);
    for (int i = 0; i < b.length; i++) {
        boolean occurs = b[i];
        if (occurs) {
            writer.write((char) i);
            writer.write("\n");
        }
    }
    writer.close();

    callbackStopWatch.stop();
    taskStopWatch.stop();
    System.out.println("Total time :" + taskStopWatch.toString());
}

From source file:org.trnltk.apps.tokenizer.UniqueWordFinderApp.java

@App("Goes thru tokenized files, finds unique words")
public void findWordHistogram() throws InterruptedException {
    final StopWatch taskStopWatch = new StopWatch();
    taskStopWatch.start();/*from w w  w .j av  a 2  s.c o  m*/

    final File parentFolder = new File("D:\\devl\\data\\aakindan");
    final File sourceFolder = new File(parentFolder, "src_split_tokenized");
    final File[] files = sourceFolder.listFiles();
    Validate.notNull(files);

    final List<File> filesToRead = new ArrayList<File>();
    for (File file : files) {
        if (file.isDirectory())
            continue;

        filesToRead.add(file);
    }

    int NUMBER_OF_THREADS = 8;
    final ThreadPoolExecutor pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(NUMBER_OF_THREADS);
    Map[] countMaps = new Map[NUMBER_OF_THREADS];
    for (int i = 0; i < countMaps.length; i++) {
        countMaps[i] = new HashMap(1000000);
    }

    for (int i = 0; i < filesToRead.size(); i++) {
        File file = filesToRead.get(i);
        //noinspection unchecked
        pool.execute(new HistogramCommand(countMaps[i % NUMBER_OF_THREADS], file));
    }

    pool.shutdown();
    while (!pool.isTerminated()) {
        //System.out.println("Waiting pool to be terminated!");
        pool.awaitTermination(3000, TimeUnit.MILLISECONDS);
    }

    System.out.println("Merging countMaps");
    final HashMap<String, Integer> mergeMap = new HashMap<String, Integer>(
            countMaps[0].size() * NUMBER_OF_THREADS); //approx
    for (Map<String, Integer> countMap : countMaps) {
        for (Map.Entry<String, Integer> stringIntegerEntry : countMap.entrySet()) {
            final String surface = stringIntegerEntry.getKey();
            final Integer newCount = stringIntegerEntry.getValue();
            final Integer existingCount = mergeMap.get(surface);
            if (existingCount == null)
                mergeMap.put(surface, newCount);
            else
                mergeMap.put(surface, existingCount + newCount);
        }
    }

    System.out.println("Sorting mergeMaps");
    final Map<String, Integer> sortedMergeMap = new TreeMap<String, Integer>(new Comparator<String>() {
        @Override
        public int compare(String a, String b) {
            Integer x = mergeMap.get(a);
            Integer y = mergeMap.get(b);
            if (x.equals(y)) {
                return a.compareTo(b);
            }
            return y.compareTo(x);
        }
    });

    sortedMergeMap.putAll(mergeMap);

    System.out.println("Writing to file");
    int numberOfTokens = 0;
    final File outputFile = new File(parentFolder, "wordHistogram.txt");
    BufferedWriter bufferedWriter = null;
    try {
        bufferedWriter = Files.newWriter(outputFile, Charsets.UTF_8);
        for (Map.Entry<String, Integer> entry : sortedMergeMap.entrySet()) {
            numberOfTokens += entry.getValue();
            bufferedWriter.write(entry.getKey() + " " + entry.getValue() + "\n");
        }
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (bufferedWriter != null)
            try {
                bufferedWriter.close();
            } catch (IOException e) {
                System.err.println("Unable to close file ");
                e.printStackTrace();
            }
    }

    taskStopWatch.stop();

    System.out.println("Total time :" + taskStopWatch.toString());
    System.out.println("Nr of tokens : " + numberOfTokens);
    System.out.println("Nr of unique tokens : " + sortedMergeMap.size());
}