de.fatalix.book.importer.BookMigrator.java Source code

Java tutorial

Introduction

Here is the source code for de.fatalix.book.importer.BookMigrator.java

Source

/*
 * Copyright (c) 2015 Felix Husse under MIT License
 * see LICENSE file
 */
package de.fatalix.book.importer;

import com.google.gson.Gson;
import de.fatalix.bookery.solr.model.BookEntry;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;

/**
 *
 * @author felix.husse
 */
public class BookMigrator {

    private static final Pattern PATTERN = Pattern.compile("[%\\.\"\\*/:<>\\?\\\\\\|\\+,\\.;=\\[\\]]");

    public static void clearDB(String solrURL, String solrCore) throws SolrServerException, IOException {
        SolrServer server = SolrHandler.createConnection(solrURL, solrCore);
        System.out.println("RESET:");
        server.deleteByQuery("*:*");
        server.commit();
    }

    public static List<File> findAllBooks(String importPath) throws IOException {
        List<File> result = new ArrayList<>();
        File importFolder = new File(importPath);
        if (!importFolder.isDirectory()) {
            throw new IOException(importFolder.getAbsolutePath() + " is not a folder!");
        }
        return walkTree(result, importFolder);
    }

    private static List<File> walkTree(List<File> result, File currentFolder) {
        if (hasFolderBook(currentFolder)) {
            result.add(currentFolder);
        } else {
            File[] subFolders = currentFolder.listFiles(new FileFilter() {

                @Override
                public boolean accept(File file) {
                    return file.isDirectory();
                }
            });
            for (File subFolder : subFolders) {
                result = walkTree(result, subFolder);
            }

        }
        return result;
    }

    private static boolean hasFolderBook(File folder) {
        return folder.listFiles(new FileFilter() {

            @Override
            public boolean accept(File file) {
                return file.getName().contains(".epub") || file.getName().contains(".mobi");
            }
        }).length > 0;
    }

    public static void filterBooks(List<File> bookFolders, File filteredFolder) throws IOException {
        int total = bookFolders.size();
        int counter = 0;
        int filteredCounter = 0;
        int percentageDone = 0;
        Gson gson = new Gson();
        File coverFilterFolder = new File(filteredFolder, "cover");
        File descriptionFilterFolder = new File(filteredFolder, "description");
        for (File bookFolder : bookFolders) {
            BookEntry bookEntry = importBatchWise(bookFolder, gson);
            boolean filtered = false;
            if (bookEntry.getCover() == null) {
                filteredCounter++;
                try {
                    String validFolderName = toValidFileName(bookEntry.getAuthor() + "-" + bookEntry.getTitle());
                    FileUtils.moveDirectory(bookFolder, new File(coverFilterFolder, validFolderName));
                } catch (IOException ex) {
                    System.out.println("Catched...");
                }
                System.out.println("Filtered " + filteredCounter + " of " + total);
                filtered = true;
            }

            if (!filtered && bookEntry.getDescription() == null) {
                filteredCounter++;
                try {
                    String validFolderName = toValidFileName(bookEntry.getAuthor() + "-" + bookEntry.getTitle());
                    FileUtils.moveDirectory(bookFolder, new File(descriptionFilterFolder, validFolderName));
                } catch (IOException ex) {
                    System.out.println("Catched...");
                }
                System.out.println("Filtered " + filteredCounter + " of " + total);
                filtered = true;
            }

            counter++;

            int currentProgress = counter * 100 / total;
            if (currentProgress > percentageDone) {
                percentageDone = currentProgress;
                System.out.println(percentageDone + "% done..");
            }

        }
        System.out.println("Finished processing");
    }

    private static String toValidFileName(String input) {
        return input.replaceAll("[:\\\\/*\"?|<>']", " ");
    }

    /**
     *
     * @param solrURL
     * @param solrCore
     * @param batchSize
     * @param importPath
     * @param reset
     * @throws IOException
     * @throws SolrServerException
     */
    public static void importBooks(String solrURL, String solrCore, int batchSize, List<File> bookFolders,
            boolean reset) throws IOException, SolrServerException {

        SolrServer server = SolrHandler.createConnection(solrURL, solrCore);
        if (reset) {
            System.out.println("RESET:");
            server.deleteByQuery("*:*");
            server.commit();

        }

        System.out.println("Connection established");

        Gson gson = new Gson();

        int total = bookFolders.size();
        int counter = 0;
        List<BookEntry> bookEntries = new ArrayList<>();
        for (File bookFolder : bookFolders) {
            bookEntries.add(importBatchWise(bookFolder, gson));
            counter++;
            if (bookEntries.size() >= batchSize) {
                UpdateResponse response = SolrHandler.addBeans(server, bookEntries);
                if (response.getStatus() != 0) {
                    throw new SolrServerException("Update failed with CODE " + response.getStatus());
                }
                bookEntries.clear();
                System.out.println("Processed " + counter + " of " + total);
            }
        }
        if (bookEntries.size() > 0) {
            UpdateResponse response = SolrHandler.addBeans(server, bookEntries);
            if (response.getStatus() != 0) {
                throw new SolrServerException("Update failed with CODE " + response.getStatus());
            }
            bookEntries.clear();
            System.out.println("Processed " + counter + " of " + total);
        }

    }

    /**
     *
     * @param solrURL
     * @param solrCore
     * @param batchSize
     * @param exportPath
     */
    public static void exportBooks(String solrURL, String solrCore, int batchSize, String exportPath)
            throws SolrServerException, IOException {
        File exportFolder = new File(exportPath);
        if (!exportFolder.isDirectory()) {
            throw new IOException(exportFolder.getAbsolutePath() + " is not a folder!");
        }

        SolrServer server = SolrHandler.createConnection(solrURL, solrCore);
        System.out.println("Connection established");
        Gson gson = new Gson();

        exportBatchWise(server, exportFolder, batchSize, 0, gson);
    }

    private static BookEntry importBatchWise(File bookFolder, Gson gson) throws IOException {
        BookEntry bookEntry = new BookEntry();

        for (File file : bookFolder.listFiles()) {
            if (file.getName().contains(".mobi")) {
                byte[] bookData = Files.readAllBytes(file.toPath());
                bookEntry.setMobi(bookData);
            } else if (file.getName().contains(".jpg")) {
                byte[] coverData = Files.readAllBytes(file.toPath());
                bookEntry.setCover(coverData);
            } else if (file.getName().contains(".epub")) {
                byte[] bookData = Files.readAllBytes(file.toPath());
                bookEntry.setEpub(bookData);
            } else if (file.getName().contains(".json")) {
                BookMetaData bmd = gson.fromJson(
                        IOUtils.toString(new FileInputStream(file), Charset.defaultCharset()), BookMetaData.class);
                bookEntry.setAuthor(bmd.getAuthor()).setTitle(bmd.getTitle()).setIsbn(bmd.getIsbn())
                        .setPublisher(bmd.getPublisher()).setDescription(bmd.getDescription())
                        .setLanguage(bmd.getLanguage()).setMimeType(bmd.getMimeType())
                        .setUploadDate(bmd.getUploadDate()).setReleaseDate(bmd.getReleaseDate());
            } else if (file.getName().contains(".opf")) {
                bookEntry = parseOPF(file, bookEntry);

                bookEntry.setMimeType("mobi").setUploadDate(new DateTime(DateTimeZone.UTC).toDate());
            }
        }

        return bookEntry;
    }

    private static void exportBatchWise(SolrServer server, File exportFolder, int batchSize, int offset, Gson gson)
            throws SolrServerException, IOException {

        QueryResponse response = SolrHandler.searchSolrIndex(server, "*:*", batchSize, offset);
        List<BookEntry> bookEntries = response.getBeans(BookEntry.class);
        System.out.println(
                "Retrieved " + (bookEntries.size() + offset) + " of " + response.getResults().getNumFound());
        for (BookEntry bookEntry : bookEntries) {
            String bookTitle = bookEntry.getTitle();
            bookTitle = bookTitle.replaceAll(":", " ");
            File bookFolder = new File(exportFolder, bookEntry.getAuthor() + "-" + bookTitle);
            bookFolder.mkdirs();
            if (bookEntry.getCover() != null) {
                if (bookEntry.getEpub() != null) {
                    File bookData = new File(bookFolder, bookEntry.getAuthor() + "-" + bookTitle + ".epub");
                    Files.write(bookData.toPath(), bookEntry.getMobi(), StandardOpenOption.CREATE_NEW);
                }
                if (bookEntry.getMobi() != null) {
                    File bookData = new File(bookFolder, bookEntry.getAuthor() + "-" + bookTitle + ".mobi");
                    Files.write(bookData.toPath(), bookEntry.getMobi(), StandardOpenOption.CREATE_NEW);
                }
                File coverData = new File(bookFolder, bookEntry.getAuthor() + "-" + bookTitle + ".jpg");
                Files.write(coverData.toPath(), bookEntry.getCover(), StandardOpenOption.CREATE_NEW);

                File metaDataFile = new File(bookFolder, bookEntry.getAuthor() + "-" + bookTitle + ".json");
                BookMetaData metaData = new BookMetaData(bookEntry.getAuthor(), bookEntry.getTitle(),
                        bookEntry.getIsbn(), bookEntry.getPublisher(), bookEntry.getDescription(),
                        bookEntry.getLanguage(), bookEntry.getReleaseDate(), bookEntry.getMimeType(),
                        bookEntry.getUploadDate(), bookEntry.getViewed(), bookEntry.getShared());
                gson.toJson(metaData);
                Files.write(metaDataFile.toPath(), gson.toJson(metaData).getBytes(), StandardOpenOption.CREATE_NEW);
            }

        }

        if (response.getResults().getNumFound() > offset) {
            exportBatchWise(server, exportFolder, batchSize, offset + batchSize, gson);
        }

    }

    private static BookEntry parseOPF(File pathToOPF, BookEntry bmd) throws IOException {
        List<String> lines = Files.readAllLines(pathToOPF.toPath(), Charset.forName("UTF-8"));
        boolean multiLineDescription = false;
        String description = "";
        for (String line : lines) {
            if (multiLineDescription) {
                multiLineDescription = false;
                if (line.split("<").length == 1) {
                    multiLineDescription = true;
                    description = description + line;
                } else {
                    description = description + line.split("<")[0];
                    description = StringEscapeUtils.unescapeXml(description);
                    bmd.setDescription(description);
                }
            } else if (line.contains("dc:title")) {
                String title = line.split(">")[1].split("<")[0];
                bmd.setTitle(title);
            } else if (line.contains("dc:creator")) {
                String creator = line.split(">")[1].split("<")[0];
                bmd.setAuthor(creator);
            } else if (line.contains("dc:description")) {
                String value = line.split(">")[1];
                if (value.split("<").length == 1) {
                    multiLineDescription = true;
                    description = value;
                } else {
                    value = value.split("<")[0];
                    value = StringEscapeUtils.unescapeXml(value);
                    bmd.setDescription(value);
                }
            } else if (line.contains("dc:publisher")) {
                String value = line.split(">")[1].split("<")[0];
                bmd.setPublisher(value);
            } else if (line.contains("dc:date")) {
                String value = line.split(">")[1].split("<")[0];
                DateTime dtReleaseDate = new DateTime(value, DateTimeZone.UTC);
                if (dtReleaseDate.getYear() != 101) {
                    bmd.setReleaseDate(dtReleaseDate.toDate());
                }
            } else if (line.contains("dc:language")) {
                String value = line.split(">")[1].split("<")[0];
                bmd.setLanguage(value);
            } else if (line.contains("opf:scheme=\"ISBN\"")) {
                String value = line.split(">")[1].split("<")[0];
                bmd.setIsbn(value);
            }
        }
        return bmd;
    }

}