org.loklak.DumpImporter.java Source code

Java tutorial

Introduction

Here is the source code for org.loklak.DumpImporter.java

Source

/**
 *  DumpImporter
 *  Copyright 06.01.2016 by Michael Peter Christen, @0rb1t3r
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *  
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *  
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */

package org.loklak;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;

import org.eclipse.jetty.util.log.Log;
import org.json.JSONObject;
import org.loklak.data.DAO;
import org.loklak.data.IndexEntry;
import org.loklak.objects.MessageEntry;
import org.loklak.objects.UserEntry;
import org.loklak.tools.storage.JsonFactory;
import org.loklak.tools.storage.JsonReader;
import org.loklak.tools.storage.JsonStreamReader;

public class DumpImporter extends Thread {

    private boolean shallRun = true, isBusy = false;
    private int count = Integer.MAX_VALUE;

    public DumpImporter(int count) {
        this.count = count;
    }

    /**
     * ask the thread to shut down
     */
    public void shutdown() {
        this.shallRun = false;
        this.interrupt();
        Log.getLog().info("catched DumpImporter termination signal");
    }

    public boolean isBusy() {
        return this.isBusy;
    }

    @Override
    public void run() {

        // work loop
        loop: while (this.shallRun)
            try {

                this.isBusy = false;

                // scan dump input directory to import files
                Collection<File> import_dumps = DAO.message_dump.getImportDumps(this.count);

                // check if we can do anything
                if (import_dumps == null || import_dumps.size() == 0 || !DAO.wait_ready(Long.MAX_VALUE)) {
                    try {
                        Thread.sleep(10000);
                    } catch (InterruptedException e) {
                    }
                    continue loop;
                }
                this.isBusy = true;

                // take only one file and process this file
                File import_dump = import_dumps.iterator().next();
                final JsonReader dumpReader = DAO.message_dump.getDumpReader(import_dump);
                final AtomicLong newTweets = new AtomicLong(0);
                Log.getLog().info("started import of dump file " + import_dump.getAbsolutePath());

                // we start concurrent indexing threads to process the json objects
                Thread[] indexerThreads = new Thread[dumpReader.getConcurrency()];
                for (int i = 0; i < dumpReader.getConcurrency(); i++) {
                    indexerThreads[i] = new Thread() {
                        public void run() {
                            JsonFactory tweet;
                            try {
                                List<IndexEntry<UserEntry>> userBulk = new ArrayList<>();
                                List<IndexEntry<MessageEntry>> messageBulk = new ArrayList<>();
                                while ((tweet = dumpReader.take()) != JsonStreamReader.POISON_JSON_MAP) {
                                    try {
                                        JSONObject json = tweet.getJSON();
                                        JSONObject user = (JSONObject) json.remove("user");
                                        if (user == null)
                                            continue;
                                        UserEntry u = new UserEntry(user);
                                        MessageEntry t = new MessageEntry(json);
                                        // record user into search index
                                        userBulk.add(
                                                new IndexEntry<UserEntry>(u.getScreenName(), t.getSourceType(), u));
                                        messageBulk.add(
                                                new IndexEntry<MessageEntry>(t.getIdStr(), t.getSourceType(), t));
                                        if (userBulk.size() > 1500 || messageBulk.size() > 1500) {
                                            DAO.users.writeEntries(userBulk);
                                            DAO.messages.writeEntries(messageBulk);
                                            userBulk.clear();
                                            messageBulk.clear();
                                        }
                                        newTweets.incrementAndGet();
                                    } catch (IOException e) {
                                        Log.getLog().warn(e);
                                    }
                                    if (LoklakServer.queuedIndexing.isBusy())
                                        try {
                                            Thread.sleep(200);
                                        } catch (InterruptedException e) {
                                        }
                                }
                                try {
                                    DAO.users.writeEntries(userBulk);
                                    DAO.messages.writeEntries(messageBulk);
                                } catch (IOException e) {
                                    Log.getLog().warn(e);
                                }
                            } catch (InterruptedException e) {
                                Log.getLog().warn(e);
                            }
                        }
                    };
                    indexerThreads[i].start();
                }

                // wait for termination of the indexing threads and do logging meanwhile
                boolean running = true;
                while (running) {
                    long startTime = System.currentTimeMillis();
                    long startCount = newTweets.get();
                    running = false;
                    for (int i = 0; i < dumpReader.getConcurrency(); i++) {
                        if (indexerThreads[i].isAlive())
                            running = true;
                    }
                    try {
                        Thread.sleep(10000);
                    } catch (InterruptedException e) {
                    }
                    long runtime = System.currentTimeMillis() - startTime;
                    long count = newTweets.get() - startCount;
                    Log.getLog().info("imported " + newTweets.get() + " tweets at " + (count * 1000 / runtime)
                            + " tweets per second from " + import_dump.getName());
                }

                // catch up the number of processed tweets
                Log.getLog().info("finished import of dump file " + import_dump.getAbsolutePath() + ", "
                        + newTweets.get() + " new tweets");

                // shift the dump file to prevent that it is imported again
                DAO.message_dump.shiftProcessedDump(import_dump.getName());
                this.isBusy = false;

            } catch (Throwable e) {
                Log.getLog().warn("DumpImporter THREAD", e);
                try {
                    Thread.sleep(10000);
                } catch (InterruptedException e1) {
                }
            }

        Log.getLog().info("DumpImporter terminated");
    }

}