Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:org.olat.search.service.document.file.PdfDocument.java

License:Apache License

private String extractTextFromPdf(final VFSLeaf leaf) throws IOException, DocumentAccessException {
    if (log.isDebug()) {
        log.debug("readContent from pdf starts...");
    }//from w w w  .j  a v  a  2s .  co m
    PDDocument document = null;
    BufferedInputStream bis = null;
    try {
        bis = new BufferedInputStream(leaf.getInputStream());
        document = PDDocument.load(bis);
        if (document.isEncrypted()) {
            try {
                document.decrypt("");
            } catch (final Exception e) {
                throw new DocumentAccessException(
                        "PDF is encrypted. Can not read content file=" + leaf.getName());
            }
        }
        if (log.isDebug()) {
            log.debug("readContent PDDocument loaded");
        }
        final PDFTextStripper stripper = new PDFTextStripper();
        return stripper.getText(document);
    } finally {
        if (document != null) {
            document.close();
        }
        if (bis != null) {
            bis.close();
        }
    }

}

From source file:org.omegat.filters2.pdf.PdfFilter.java

License:Open Source License

@Override
public BufferedReader createReader(File infile, String encoding) throws IOException {
    PDFTextStripper stripper;//from w  w  w .j a va2 s . com
    stripper = new PDFTextStripper();
    stripper.setLineSeparator("\n");
    stripper.setSortByPosition(true);

    PDDocument document = PDDocument.load(infile.getAbsolutePath());
    String text = stripper.getText(document);
    document.close();

    return new BufferedReader(new StringReader(text));
}

From source file:org.opencps.util.ExtractTextLocations.java

License:Open Source License

public ExtractTextLocations(String fullPath) throws IOException {

    PDDocument document = null;/*from  w  ww  .j a v  a 2 s  . c om*/

    try {
        File input = new File(fullPath);
        document = PDDocument.load(input);

        if (document.isEncrypted()) {
            try {
                document.decrypt(StringPool.BLANK);
            } catch (Exception e) {
                _log.error(e);
            }
        }

        // ExtractTextLocations printer = new ExtractTextLocations();

        List allPages = document.getDocumentCatalog().getAllPages();
        if (allPages != null && allPages.size() > 0) {
            PDPage page = (PDPage) allPages.get(0);

            PDStream contents = page.getContents();
            if (contents != null) {
                this.processStream(page, page.findResources(), page.getContents().getStream());
            }

            PDRectangle pageSize = page.findMediaBox();
            if (pageSize != null) {
                setPageWidth(pageSize.getWidth());
                setPageHeight(pageSize.getHeight());
                setPageLLX(pageSize.getLowerLeftX());
                setPageURX(pageSize.getUpperRightX());
                setPageLLY(pageSize.getLowerLeftY());
                setPageURY(pageSize.getUpperRightY());
            }
        }
    } catch (Exception e) {
        _log.error(e);
    } finally {
        if (document != null) {
            document.close();
        }
    }
}

From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java

License:Open Source License

@Override
public List<String> split(final String inputUri, final String outputUri, final List<Integer> pages)
        throws IOException, COSVisitorException {

    final List<String> result = new ArrayList<String>();

    if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri)
            && CollectionUtils.isNotEmpty(pages)) {

        final PDDocument doc = PDDocument.load(inputUri);
        final List<PDDocument> splittedDocs = new ArrayList<PDDocument>();
        @SuppressWarnings("unchecked")
        final List<PDPage> pagesList = doc.getDocumentCatalog().getAllPages();

        // This section creates a new document for each split
        // indicated into the list, except the last one.
        Integer currentPage = 0;/* w  ww.  j a  v a2 s  .  com*/
        for (final Integer page : pages) {
            final PDDocument document = new PDDocument();
            for (Integer i = currentPage; i <= page - 2; i++) {
                document.addPage(pagesList.get(i));
            }
            splittedDocs.add(document);
            currentPage = page - 1;
            document.close();
        }

        // This section splits the last document
        final PDDocument lastDocument = new PDDocument();
        for (Integer i = currentPage; i < pagesList.size(); i++) {
            lastDocument.addPage(pagesList.get(i));
        }
        splittedDocs.add(lastDocument);
        lastDocument.close();

        Integer subIndex = 1;
        for (final PDDocument document : splittedDocs) {
            final String extension = this.converterUtils.addSubIndexBeforeExtension(outputUri, subIndex++);
            document.save(extension);
            result.add(extension);
        }

        doc.close();

    } else {
        throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE);
    }

    return result;
}

From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java

License:Open Source License

@Override
public List<String> split(final String inputUri, final String outputUri, final Integer pages)
        throws IOException, COSVisitorException {

    final List<String> result = new ArrayList<String>();

    if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri) && pages != null) {

        final PDDocument doc = PDDocument.load(inputUri);

        final Splitter splitter = new Splitter();

        splitter.setSplitAtPage(pages);//from  w w w  . j av  a  2s  . co  m

        final List<PDDocument> splittedDocs = splitter.split(doc);

        Integer subIndex = 1;
        for (final PDDocument document : splittedDocs) {
            final String extension = this.converterUtils.addSubIndexBeforeExtension(outputUri, subIndex++);
            document.save(extension);
            result.add(extension);
            document.close();
        }

        doc.close();

    } else {
        throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE);
    }

    return result;
}

From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java

License:Open Source License

@Override
public void protect(final String inputUri, final String outputUri, final String password)
        throws IOException, BadSecurityHandlerException, COSVisitorException {

    if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri)
            && StringUtils.isNotBlank(password)) {

        final PDDocument doc = PDDocument.load(inputUri);

        final StandardProtectionPolicy pp = new StandardProtectionPolicy(password, password,
                new AccessPermission());
        doc.protect(pp);/*from   w  w w. j  a v a 2s . c  o m*/

        doc.save(outputUri);

        doc.close();

    } else {
        throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE);
    }
}

From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java

License:Open Source License

@Override
public void unProtect(final String inputUri, final String outputUri, final String password)
        throws IOException, COSVisitorException, BadSecurityHandlerException, CryptographyException {

    if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri)
            && StringUtils.isNotBlank(password)) {

        final PDDocument doc = PDDocument.load(inputUri);

        final DecryptionMaterial decryptionMaterial = new StandardDecryptionMaterial(password);
        doc.openProtection(decryptionMaterial);

        final StandardProtectionPolicy pp = new StandardProtectionPolicy(null, null, new AccessPermission());
        doc.protect(pp);/* w w  w . jav a2s.co m*/

        doc.save(outputUri);

        doc.close();

    } else {
        throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE);
    }
}

From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java

License:Open Source License

@Override
public void putWatermark(final String inputUri, final String outputUri, final String text, final Color color,
        final Float alpha, final WatermarkPosition watermarkPosition, final List<Integer> pages)
        throws IOException, COSVisitorException, WatermarkOutOfLengthException {

    if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri) && StringUtils.isNotBlank(text)
            && color != null && alpha != null && watermarkPosition != null) {

        // If watermark position is not centered, then max length is the
        // same for landscape and portrait, text length is tested here so
        // there is no need to continue.
        if (!WatermarkPosition.CENTER.equals(watermarkPosition)
                && text.length() > watermarkPosition.getMaxLengthPortrait()) {
            throw new WatermarkOutOfLengthException(Constants.WATERMARK_OUT_OF_LENGTH_EXCEPTION_MESSAGE);
        }/*ww w  .  j ava  2 s .  c  o  m*/

        final PDDocument doc = PDDocument.load(inputUri);
        final List<?> allPages = doc.getDocumentCatalog().getAllPages();

        this.converterUtils.deleteNonSelectedPositions(allPages, pages);

        if (CollectionUtils.isNotEmpty(allPages)) {
            for (final Object object : allPages) {
                final PDPage page = (PDPage) object;

                // The transparency, opacity of graphic objects can be set
                // directly
                // on the drawing commands but need to be set to a graphic
                // state
                // which will become part of the resources. Graphic state is
                // set
                // up.
                this.watermarkUtils.setUpGraphicState(page, alpha);

                // Now we will be able to call the state definition before
                // doing
                // the
                // drawing
                try {
                    this.watermarkUtils.addWatermark(doc, page, color, text, watermarkPosition);
                } catch (final WatermarkOutOfLengthException e) {
                    doc.close();
                    throw e;
                }
            }
        }

        doc.save(outputUri);
        doc.close();

    } else {
        throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE);
    }
}

From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java

License:Open Source License

@Override
public void addBookmarks(final String inputUri, final String outputUri, final String title,
        final List<PDFGalBookmark> pdfGalBookmarksList) throws IOException, COSVisitorException {

    if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri) && StringUtils.isNotEmpty(title)
            && CollectionUtils.isNotEmpty(pdfGalBookmarksList)) {

        final PDDocument doc = PDDocument.load(inputUri);

        final PDDocumentOutline outline = new PDDocumentOutline();
        doc.getDocumentCatalog().setDocumentOutline(outline);
        final PDOutlineItem pagesOutline = new PDOutlineItem();
        pagesOutline.setTitle(title);// w  w  w. j  a v a2  s.com
        @SuppressWarnings("unchecked")
        final List<PDPage> pages = doc.getDocumentCatalog().getAllPages();
        outline.appendChild(pagesOutline);

        for (final PDFGalBookmark pdfGalBookmark : pdfGalBookmarksList) {
            if (pdfGalBookmark != null && pdfGalBookmark.isInitializated()) {
                final PDPage page = pages.get(pdfGalBookmark.getPage() - 1);
                final PDPageFitWidthDestination dest = new PDPageFitWidthDestination();
                dest.setPage(page);
                final PDOutlineItem bookmark = new PDOutlineItem();
                bookmark.setDestination(dest);
                bookmark.setTitle(pdfGalBookmark.getText());
                pagesOutline.appendChild(bookmark);
            }
        }
        pagesOutline.openNode();
        outline.openNode();

        doc.save(outputUri);
        doc.close();

    } else {
        throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE);
    }
}

From source file:org.pdfgal.pdfgal.pdfgal.impl.PDFGalImpl.java

License:Open Source License

@Override
public void reIndexPageNumbers(final String inputUri, final String outputUri,
        final List<PDFGalPageNumbering> pdfGalPageNumberingList) throws IOException, COSVisitorException {

    if (StringUtils.isNotBlank(inputUri) && StringUtils.isNotBlank(outputUri)
            && CollectionUtils.isNotEmpty(pdfGalPageNumberingList)) {

        final PDDocument doc = PDDocument.load(inputUri);

        final PDPageLabels pdPageLabels = new PDPageLabels(doc);

        for (final PDFGalPageNumbering pageNumbering : pdfGalPageNumberingList) {
            if (pageNumbering.isInitializated()) {
                final PDPageLabelRange pdPageLabelRange = new PDPageLabelRange();
                pdPageLabelRange.setStyle(pageNumbering.getNumberingStyle().getValue());
                pdPageLabels.setLabelItem(pageNumbering.getPageNumber() - 1, pdPageLabelRange);
            }// w  w  w  . j  av a  2  s  . co m
        }

        doc.getDocumentCatalog().setPageLabels(pdPageLabels);

        doc.save(outputUri);
        doc.close();

    } else {
        throw new IllegalArgumentException(Constants.ILLEGAL_ARGUMENT_EXCEPTION_MESSAGE);
    }
}