Java tutorial
/* * Copyright 2010-2011 yvind Berg (elacin@gmail.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.elacin.pdfextract.datasource.pdfbox; import org.apache.log4j.Logger; import org.apache.log4j.MDC; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.elacin.pdfextract.Constants; import org.elacin.pdfextract.datasource.DocumentContent; import org.elacin.pdfextract.datasource.PDFSource; import org.elacin.pdfextract.datasource.RenderedPage; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; /** * Created by IntelliJ IDEA. User: elacin Date: 15.01.11 Time: 19.57 To change this template use * File | Settings | File Templates. */ public class PDFBoxSource implements PDFSource { // ------------------------------ FIELDS ------------------------------ private static final Logger log = Logger.getLogger(PDFBoxSource.class); private DocumentContent contents; @NotNull private final PDDocument doc; private final int endPage; @NotNull public final File pdfDocument; private final int startPage; // --------------------------- CONSTRUCTORS --------------------------- public PDFBoxSource(@NotNull File pdfDocument, int startPage, int endPage, String password) { this.pdfDocument = pdfDocument; this.startPage = startPage; this.endPage = endPage; doc = openPdfDocument(pdfDocument, password); } // ------------------------ INTERFACE METHODS ------------------------ // --------------------- Interface PDFSource --------------------- @NotNull public DocumentContent readPages() { if (contents != null) { return contents; } final long t0 = System.currentTimeMillis(); PDFBoxIntegration pdfbox; try { pdfbox = new PDFBoxIntegration(doc, startPage, endPage); pdfbox.processDocument(); } catch (IOException e) { throw new RuntimeException("Error while reading document", e); } final long td = System.currentTimeMillis() - t0; log.info("LOG01190:Read document in " + td + " ms"); contents = pdfbox.getContents(); return contents; } @NotNull public RenderedPage renderPage(int pageNum) { final PDPage page = (PDPage) doc.getDocumentCatalog().getAllPages().get(pageNum - 1); final BufferedImage image; try { image = page.convertToImage(8, Constants.RENDER_DPI); } catch (IOException e) { throw new RuntimeException(e); } float xScale = (float) image.getWidth() / page.findMediaBox().getWidth(); float yScale = (float) image.getHeight() / page.findMediaBox().getHeight(); return new RenderedPage(image, xScale, yScale); } public void closeSource() { try { doc.close(); } catch (IOException e) { log.warn("LOG01250:Error while closing PDF document", e); } } // -------------------------- STATIC METHODS -------------------------- @NotNull protected static PDDocument openPdfDocument(@NotNull final File pdfFile, @Nullable final String password) { long t0 = System.currentTimeMillis(); MDC.put("doc", pdfFile.getName()); log.info("LOG00120:Opening PDF file " + pdfFile + "."); try { final PDDocument document = PDDocument.load(pdfFile); if (document.isEncrypted()) { if (password != null) { try { document.decrypt(password); } catch (Exception e) { throw new RuntimeException("Error while reading encrypted PDF:", e); } } else { log.warn("File claims to be encrypted, a password should be provided"); } } log.debug("load()took" + (System.currentTimeMillis() - t0) + "ms"); return document; } catch (IOException e) { MDC.put("doc", ""); throw new RuntimeException("Error while reading " + pdfFile + ".", e); } } }