Example usage for org.apache.pdfbox.pdmodel PDDocument close

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument close.

Prototype

@Override
public void close() throws IOException

Source Link

Document

This will close the underlying COSDocument object.

Usage

From source file:pdfcompressor.PDFCompressor.java

public BufferedImage getBytePreview(int pageNum) throws IOException {
    PageExtractor extractor = new PageExtractor(sourceDocument, pageNum, pageNum + 1);
    PDDocument extractedDoc = extractor.extract();
    PDPage page = (PDPage) extractedDoc.getDocumentCatalog().getAllPages().get(0);
    extractedDoc.close();
    return ImageIO.read(new ByteArrayInputStream(
            getImageByteArray(page.convertToImage(BufferedImage.TYPE_INT_BGR, dpi), compressRate)));
}

From source file:pdfconverter.converter3.java

@SuppressWarnings("deprecation")

public static void main(String[] args) throws IOException, WriteException {
    workbook = Workbook.createWorkbook(new File(output));
    System.out.println("File created");
    WritableSheet sheet = workbook.createSheet("Page", 0);
    ExcelStart(sheet);/*from   w  w w  . ja v  a2  s  . c o m*/

    //Scanner user_input = new Scanner( System.in );
    File dir = new File(path);
    //System.out.println(dir.getPath());
    File[] dirList = dir.listFiles(new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            return name.endsWith(".pdf");
        }
    });

    int counter = 1;
    PDDocument pd;
    PDFTextStripper stripper = new PDFTextStripper();
    PDFTextStripperByArea areaSearch = new PDFTextStripperByArea();
    PDFTextStripperByArea stripper2 = new PDFTextStripperByArea();
    PDFTextStripperByArea stripper3 = new PDFTextStripperByArea();
    //PDRectangle rect = new PDRectangle(0, 0, 100, 100);
    stripper.setStartPage(1); //Start extracting from page 3
    stripper.setEndPage(1); //Extract till page 5
    File f = new File(dirList[0].getPath());

    pd = PDDocument.load(f);
    //int curHeight = 136;
    //int rowCount = 37;
    int curHeight = 116;
    int rowCount = 39;
    int rowHeight = 9;
    int sheetRowCount = 0;
    int pageStop = 1491;

    for (int curpage = 800; curpage < pageStop; curpage++) {
        if (counter > 800) {
            break;
        }
        PDPage page = pd.getPage(curpage);

        System.out.println("Now parsing page " + curpage);
        for (int curRow = 0; curRow < 80; curRow++) {
            Rectangle2D.Float cell = new Rectangle2D.Float(0, curHeight, 80, rowHeight);
            String name = "cell-1-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            String text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 0, sheetRowCount + 1);

            cell = new Rectangle2D.Float(80, curHeight, 30, rowHeight);
            name = "cell-2-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 1, sheetRowCount + 1);

            cell = new Rectangle2D.Float(110, curHeight, 40, rowHeight);
            name = "cell-3-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 2, sheetRowCount + 1);

            cell = new Rectangle2D.Float(150, curHeight, 120, rowHeight);
            name = "cell-4-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 3, sheetRowCount + 1);

            cell = new Rectangle2D.Float(270, curHeight, 120, rowHeight);
            name = "cell-5-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 4, sheetRowCount + 1);

            cell = new Rectangle2D.Float(390, curHeight, 40, rowHeight);
            name = "cell-6-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 5, sheetRowCount + 1);

            cell = new Rectangle2D.Float(430, curHeight, 46, rowHeight);
            name = "cell-7-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 6, sheetRowCount + 1);

            cell = new Rectangle2D.Float(476, curHeight, 82, rowHeight);
            name = "cell-8-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 7, sheetRowCount + 1);

            cell = new Rectangle2D.Float(558, curHeight, 65, rowHeight);
            name = "cell-9-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 8, sheetRowCount + 1);

            cell = new Rectangle2D.Float(623, curHeight, 66, rowHeight);
            name = "cell-10-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 9, sheetRowCount + 1);

            cell = new Rectangle2D.Float(689, curHeight, 100, rowHeight);
            name = "cell-11-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 10, sheetRowCount + 1);

            sheetRowCount++;
            curHeight += rowHeight;
        }

        //Rectangle2D.Float issueDate = new Rectangle2D.Float(0, 0, 80, page.getMediaBox().getHeight());
        //stripper2.addRegion("issueDate", issueDate);
        //Rectangle2D.Float amount = new Rectangle2D.Float(80, 0, 30, page.getMediaBox().getHeight());
        //stripper2.addRegion("amount", amount);
        //Rectangle2D.Float citation = new Rectangle2D.Float(110, 0, 40, page.getMediaBox().getHeight());
        //stripper2.addRegion("citation", citation);
        //Rectangle2D.Float violation = new Rectangle2D.Float(150, 0, 120, page.getMediaBox().getHeight());
        //stripper2.addRegion("violation", violation);
        //Rectangle2D.Float comment = new Rectangle2D.Float(270, 0, 120, page.getMediaBox().getHeight());
        //stripper2.addRegion("comment", comment);
        //Rectangle2D.Float warning = new Rectangle2D.Float(390, 0, 40, page.getMediaBox().getHeight());
        //stripper2.addRegion("warning", warning);
        //Rectangle2D.Float license = new Rectangle2D.Float(430, 0, 46, page.getMediaBox().getHeight());
        //stripper2.addRegion("license", license);
        //Rectangle2D.Float lot = new Rectangle2D.Float(476, 0, 82, page.getMediaBox().getHeight());
        //stripper2.addRegion("lot", lot);
        //Rectangle2D.Float make = new Rectangle2D.Float(558, 0, 65, page.getMediaBox().getHeight());
        //stripper2.addRegion("make", make);
        //Rectangle2D.Float officer = new Rectangle2D.Float(623, 0, 66, page.getMediaBox().getHeight());
        //stripper2.addRegion("officer", officer);
        //Rectangle2D.Float state = new Rectangle2D.Float(689, 0, 100, page.getMediaBox().getHeight());
        //stripper2.addRegion("state", state);

        //stripper2.extractRegions(page);
        //String text = stripper2.getTextForRegion("license");

        //Rectangle2D.Float row = new Rectangle2D.Float(0, 156, 80, 10);
        //stripper3.addRegion("row", row);
        //stripper3.extractRegions(page);
        //String text = stripper3.getTextForRegion("row");
        //System.out.println(text);
        counter++;
        curHeight = 116;
        rowCount = 39;
    }
    //AddRow(sheet, text, counter);
    //counter++;
    pd.close();

    System.out.println("Data extracted to Excel, parsing through Excel data...");

    boolean multiline = true;
    while (multiline) {
        multiline = false;
        for (int row = 0; row < sheet.getRows(); row++) {
            Cell cell = sheet.getCell(0, row);
            if (cell.getContents().length() < 5) {
                multiline = true;
                WritableCell cell2 = sheet.getWritableCell(4, row - 1);
                WritableCell cell3 = sheet.getWritableCell(4, row);
                String content = cell2.getContents() + cell3.getContents();
                content = content.replace("\n", "").replace("\r", "");
                Label l = (Label) cell2;
                l.setString(content);
                sheet.removeRow(row);
            }
        }
    }

    System.out.println("Data extraction complete");
    workbook.write();
    workbook.close();
}

From source file:pdfedittest.PDFEditTest.java

/**
 * @param args the command line arguments
 *//*from www .j  av a  2 s  .  c  o  m*/
public static void main(String[] args) {
    // TODO code application logic here
    PDDocument pd;
    BufferedWriter wr;
    try {
        String outputPath = "rayani.txt";
        File input = new File("C:\\Users\\Administrator\\Desktop\\FA Feb 16.pdf"); // The PDF file from where you would like to extract
        File output = new File(outputPath); // The text file where you are going to store the extracted data
        pd = PDDocument.load(input);

        System.out.println(pd.getNumberOfPages());
        System.out.println(pd.isEncrypted());
        //pd.save("CopyOfInvoice.pdf"); // Creates a copy called "CopyOfInvoice.pdf"
        PDFTextStripper stripper = new PDFTextStripper();
        //stripper.setStartPage(3); //Start extracting from page 3
        //stripper.setEndPage(5); //Extract till page 5

        //stripper.set
        wr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output)));
        stripper.writeText(pd, wr);
        if (pd != null) {
            pd.close();
        }
        // I use close() to flush the stream.
        wr.close();

        TextParser a = new TextParser(outputPath);
        a.getUserDetail("");
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:pdfpositional.PdfPositional.java

/**
 * @param args the command line arguments
 *///from  w w  w . j  a  v  a 2 s  .  co  m
public static void main(String[] args) {
    try {
        // check file param
        if (args.length == 0) {
            throw new ParameterException("No file parameter specified");
        }

        String file = args[args.length - 1];
        Pattern patternFile = Pattern.compile("(?i)^[\\w,\\s-()/]+\\.pdf$");
        Matcher matcherFile = patternFile.matcher(file);

        // check file is valid format
        if (!matcherFile.find()) {
            throw new ParameterException("File parameter invalid: " + file);
        }

        // check if file exists
        File input = new File(file);
        if (!input.exists()) {
            throw new ParameterException("File does not exist: " + file);
        }

        // ensure it isnt a directory
        if (input.isDirectory()) {
            throw new ParameterException("File is a directory: " + file);
        }

        PdfPositional pdfPositional = new PdfPositional(input);
        pdfPositional.setConversion(new Float(1.388888888889));

        pdfPositional.processFileArgument(args[args.length - 1]);
        Pattern patternArgument = Pattern.compile("^-{2}([^=]+)[=]([\\s\\S]+)$");
        Matcher matcher;

        for (int i = 0; i < args.length - 1; i++) {
            matcher = patternArgument.matcher(args[i]);
            while (matcher.find()) {
                switch (matcher.group(1)) {
                case "page":
                    pdfPositional.setPageNumber(Integer.parseInt(matcher.group(2)));
                    break;
                case "output":
                    pdfPositional.setOutputFile(matcher.group(2));
                    break;
                }
            }
        }

        PDDocument document;
        document = PDDocument.load(pdfPositional.getInputFile());

        // check for encrypted document
        if (document.isEncrypted()) {
            try {
                document.decrypt("");
            } catch (CryptographyException | IOException e) {
                document.close();
                throw new EncryptedDocumentException();
            }
        }

        List allPages = document.getDocumentCatalog().getAllPages();
        if (pdfPositional.hasPageNumber()) {
            if (document.getNumberOfPages() < pdfPositional.getPageNumber()) {
                throw new ParameterException("illegal page number");
            }
            PDPage page = (PDPage) allPages.get(pdfPositional.getPageNumber() - 1);
            PDStream contents = page.getContents();
            if (contents != null) {
                pdfPositional.processStream(page, page.findResources(), page.getContents().getStream());
                pdfPositional.addPageDataToPdfData();
                pdfPositional.writeJSONToOutputStream();
            }
        } else {
            for (int i = 0; i < allPages.size(); i++) {
                pdfPositional.setPageNumber(i + 1);
                PDPage page = (PDPage) allPages.get(i);
                PDStream contents = page.getContents();

                if (contents != null) {
                    pdfPositional.processStream(page, page.findResources(), page.getContents().getStream());
                    pdfPositional.addPageDataToPdfData();
                    pdfPositional.writeJSONToOutputStream();
                }

                page.clear();
            }
        }

        pdfPositional.destroyOutputStream();
        document.close();

        System.exit(0);
    } catch (ParameterException ex) {
        System.out.println("Parameter Error: " + ex.getMessage());
        System.exit(1);
    } catch (EncryptedDocumentException ex) {
        System.out.println("Encrypted Document Error");
        System.exit(1);
    } catch (IOException | NumberFormatException ex) {
        System.out.println("General Error");
        System.exit(1);
    }

}

From source file:pdfreader.ColorScheme.java

public void getColor() throws IOException {
    PDDocument doc = null;
    try {/*from  w w w.j  a  v a  2s.c o m*/
        doc = PDDocument.load("D://My.pdf");
        PDFStreamEngine engine = new PDFStreamEngine(
                ResourceLoader.loadProperties("org//apache//pdfbox//resources//PageDrawer.properties", true));
        PDPage page = (PDPage) doc.getDocumentCatalog().getAllPages().get(0);
        engine.processStream(page, page.findResources(), page.getContents().getStream());
        PDGraphicsState graphicState = engine.getGraphicsState();
        System.out.println(graphicState.getStrokingColor().getColorSpace().getName());
        float colorSpaceValues[] = graphicState.getStrokingColor().getColorSpaceValue();
        for (float c : colorSpaceValues) {
            System.out.println(c * 255);
        }
    } finally {
        if (doc != null) {
            doc.close();
        }

    }
}

From source file:pdfsplicer.SplicerModel.java

License:Open Source License

/**
 * Create the new PDF, and save it.//from  w ww. j  a  va  2  s.c  om
 * 
 * @param saveFile the file to save it as
 * @throws IOException if it cannot save the file
 */
public void makeFinalizedPDF(File saveFile) throws IOException {

    PDDocument doc = null;
    PDDocument newdoc = new PDDocument();

    for (int i = 0; i < pageEntryPDFList.size(); ++i) {
        doc = pdfList.get(pageEntryPDFList.get(i));

        if (doc.isEncrypted()) {
            System.out.println("Error: Encrypted PDF");
            System.exit(1);
        }

        List<Integer> pRange = pageRangeList.get(i);
        PDFCloneUtility pdfCloner = new PDFCloneUtility(newdoc);
        for (int pNum : pRange) {
            PDPage page = doc.getPage(pNum - 1);
            COSDictionary clonedDict = (COSDictionary) pdfCloner.cloneForNewDocument(page);
            newdoc.addPage(new PDPage(clonedDict));
        }
    }

    newdoc.save(saveFile);
    if (newdoc != null) {
        newdoc.close();
    }
}

From source file:pdftotext.Pdfprac2.java

public String pdftoText(String fileName) {
    PDFParser parser;/*  www. j  a v a 2s  .com*/
    String parsedText = null;
    ;
    String location = fileName.replaceAll(".pdf", ".txt");
    //System.out.println(location);
    PDFTextStripper pdfStripper = null;
    PDDocument pdDoc = null;
    COSDocument cosDoc = null;
    File file = new File(fileName);
    if (!file.isFile()) {
        System.err.println("File " + fileName + " does not exist.");
        return null;
    }
    try {
        parser = new PDFParser(new FileInputStream(file));
    } catch (IOException e) {
        System.err.println("Unable to open PDF Parser. " + e.getMessage());
        return null;
    }
    try {
        parser.parse();
        cosDoc = parser.getDocument();
        pdfStripper = new PDFTextStripper();
        pdDoc = new PDDocument(cosDoc);
        //pdfStripper.setStartPage(1);
        //pdfStripper.setEndPage(5);
        parsedText = pdfStripper.getText(pdDoc);
        try (PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(location, true)))) {
            out.println(parsedText);
        }
    } catch (IOException e) {
        System.err.println("An exception occured in parsing the PDF Document." + e.getMessage());
    } finally {
        try {
            if (cosDoc != null)
                cosDoc.close();
            if (pdDoc != null)
                pdDoc.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    return location;
}

From source file:pdfviewer.Pdfviewer.java

public static void generatePDFFile(String date, String pdfFileName, Boolean[][] statusArray, String cycle)
        throws IOException, COSVisitorException {
    PDDocument pdf = PDDocument.load(pdfFileName);
    //String[] names = pdfFileName.split("\\.");
    String targetFile = "";
    if (pdfFileName.contains(".pdf")) { //doesn't contain surfix
        targetFile = pdfFileName.replace(".pdf", "_forAudit.pdf");
    } else {//from   w  ww  .ja  va 2 s .  c  o  m
        targetFile = pdfFileName + "_forAudit.pdf";
    }

    // == prepare for void mark
    String imageName = "void.jpg";
    BufferedImage buffered = ImageIO.read(new File(imageName));
    PDJpeg voidMark = new PDJpeg(pdf, buffered);
    // == end of preparing for void mark

    List pages = pdf.getDocumentCatalog().getAllPages();
    Iterator<PDPage> iter = pages.iterator();
    int pageNum = 0; // 0 based
    int sequenceNum = 1; // start from 0001
    while (iter.hasNext()) {
        PDPage page = iter.next();
        PDPageContentStream stream = new PDPageContentStream(pdf, page, true, false);

        // == date stamp
        stream.beginText();
        stream.setFont(PDType1Font.HELVETICA, 20);
        stream.moveTextPositionByAmount(200, 20);
        stream.drawString(date); //date stamp 
        stream.endText();
        // == end of date stamp

        // == void stamp
        if (statusArray[GlobalVar.VOID_BUTTON_INDEX][pageNum]) {
            stream.drawImage(voidMark, 100, 200);
        }
        // == end of void stamp

        // == seq stamp
        if (statusArray[GlobalVar.SELECT_BUTTON_INDEX][pageNum]) {
            stream.beginText();
            stream.setFont(PDType1Font.HELVETICA, 24);
            stream.moveTextPositionByAmount(600, 400);
            stream.setTextRotation(3.14 / 2, 600, 400); // rotate text 90 degree at x = 600, y = 400

            stream.drawString(cycle + "/" + globalCounterGenerator(sequenceNum));
            sequenceNum++;
            stream.endText();
        }
        // == end of seq stamp

        stream.close();
        pageNum++;
    }
    pdf.save(targetFile);
    pdf.close();

}

From source file:pdfviewer.Pdfviewer.java

public static String dateStampPDFFile(String date, String pdfFileName) throws IOException, COSVisitorException {
    PDDocument pdf = PDDocument.load(pdfFileName);
    //String[] names = pdfFileName.split("\\.");
    String targetFile = "";
    if (pdfFileName.contains(".pdf")) { //doesn't contain surfix
        targetFile = pdfFileName.replace(".pdf", "_DS.pdf");
    } else {/*w w  w . j a v  a 2 s . com*/
        targetFile = pdfFileName + "_DS.pdf";
    }
    //        String imageName = "void.jpg";
    //        String fileName = "res.pdf"     

    List pages = pdf.getDocumentCatalog().getAllPages();
    Iterator<PDPage> iter = pages.iterator();
    int pageNum = 0;
    while (iter.hasNext()) {
        PDPage page = iter.next();
        PDPageContentStream stream = new PDPageContentStream(pdf, page, true, false);

        // == date stamp
        stream.beginText();
        stream.setFont(PDType1Font.HELVETICA, 24);
        stream.moveTextPositionByAmount(100, 300);
        stream.drawString(date);
        stream.endText();
        // == end of date stamp

        stream.close();
    }
    pdf.save(targetFile);
    pdf.close();
    return targetFile;
}