net.anthonypoon.billscrapper.JavaBillScrapper.java Source code

Java tutorial

Introduction

Here is the source code for net.anthonypoon.billscrapper.JavaBillScrapper.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package net.anthonypoon.billscrapper;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import net.anthonypoon.billscrapper.database.DatabaseConnector;
import net.anthonypoon.billscrapper.database.DbWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

/**
 *
 * @author anthony.poon
 */
public class JavaBillScrapper {
    private enum Flags {
        VERBOSE("-v"), INSERT_INTO_DB("-i");
        private String str;

        private Flags(String str) {
            this.str = str;
        }

        @Override
        public String toString() {
            return str;
        }

        public static Flags fromString(String str) {
            for (Flags arg : Flags.values()) {
                if (str.equalsIgnoreCase(arg.toString())) {
                    return arg;
                }
            }
            throw new IllegalArgumentException("Illegal Flag:" + str);
        }

    }

    private Bill billObj = new Bill();
    private static List<String> filePaths = new ArrayList<>();
    private static List<Flags> options = new ArrayList<>();

    public JavaBillScrapper(File pdfFile) throws IOException {
        PDDocument doc = PDDocument.load(pdfFile);
        PDFTextStripper stripper = new PDFTextStripper();
        String rawText = stripper.getText(doc);
        String[] textArray = rawText.split("[\\r\\n]+");
        this.billObj = parsePdf(textArray);
        doc.close();
    }

    public Bill getBill() {
        return billObj;
    }

    public static void main(String[] args) {
        // TODO code application logic here
        try {
            for (String arg : args) {
                if (!arg.startsWith("-")) {
                    filePaths.add(arg);
                } else {
                    try {
                        options.add(Flags.fromString(arg));
                    } catch (IllegalArgumentException ex) {
                        System.err.println("Illegal options: " + arg);
                    }
                }
            }
            Collections.sort(filePaths);
            for (String filePath : filePaths) {
                System.out.println("Loading: " + filePath);
                PDDocument doc = PDDocument.load(new File(filePath));
                PDFTextStripper stripper = new PDFTextStripper();
                String rawText = stripper.getText(doc);
                String[] textArray = rawText.split("[\\r\\n]+");
                Bill bill = parsePdf(textArray);
                if (options.contains(Flags.INSERT_INTO_DB)) {
                    DatabaseConnector db = new DatabaseConnector();
                    DbWriter writer = new DbWriter(db.getConnection());
                    boolean isInserted = writer.insertDetail(bill.getBillSummary(), bill.getPhoneSummaryData(),
                            bill.getPhoneDetail());
                    writer.commit();
                    doc.close();
                    if (!isInserted) {
                        System.out.println(filePath + " was not inserted into database.");
                    }
                }
            }

        } catch (Exception ex) {
            ex.printStackTrace(System.out);
        }
    }

    private static Bill parsePdf(String[] tArray) {
        BillSummaryParser billSummaryParser = new BillSummaryParser();
        PhoneSummaryParser phoneSummaryParser = new PhoneSummaryParser();
        PhoneDetailParser phoneDetailParser = new PhoneDetailParser();
        BillParser currentParser = billSummaryParser;
        for (String text : tArray) {
            //System.out.print(text);
            currentParser.feedText(text);
            if (Pattern.matches("(?i)^phone summary$", text)) {
                currentParser = phoneSummaryParser;
            } else if (Pattern.matches("(?i)^phone details$", text)) {
                currentParser = phoneDetailParser;
            }
        }
        Bill returnObj = new Bill();
        returnObj.setBillSummary(billSummaryParser.getData());
        returnObj.setPhoneSummaryData(phoneSummaryParser.getData());
        returnObj.setPhoneDetail(phoneDetailParser.getData());
        if (options.contains(Flags.VERBOSE)) {
            billSummaryParser.dump();
            phoneSummaryParser.dump();
            phoneDetailParser.dump();
        }
        return returnObj;
    }
}