Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:de.micromata.genome.gwiki.plugin.pdftextextractor_1_0.PdfTextExtractor.java

License:Apache License

public String extractText(String fileName, InputStream data) {
    try {/*from   w  w  w  .j  a  va  2 s.co  m*/
        PDDocument doc = PDDocument.load(data);
        PDFTextStripper st = new PDFTextStripper("UTF-8");
        StringWriter sout = new StringWriter();
        st.writeText(doc, sout);
        doc.close();
        return sout.getBuffer().toString();
    } catch (IOException ex) {
        throw new RuntimeIOException(ex);
    }
}

From source file:de.mirkosertic.desktopsearch.pdfpreview.PDFPreviewGenerator.java

License:Open Source License

@Override
public synchronized Preview createPreviewFor(File aFile) {
    PDDocument theDocument = null;/*  w  w  w  .  j  a  va2s  .  c  om*/
    try {
        theDocument = PDDocument.load(aFile);
        List<?> thePages = theDocument.getDocumentCatalog().getAllPages();
        if (thePages.isEmpty()) {
            return null;
        }
        PDPage theFirstPage = (PDPage) thePages.get(0);

        PDRectangle mBox = theFirstPage.findMediaBox();
        float theWidthPt = mBox.getWidth();
        float theHeightPt = mBox.getHeight();
        int theWidthPx = THUMB_WIDTH; // Math.round(widthPt * scaling);
        int theHeightPx = THUMB_HEIGHT; // Math.round(heightPt * scaling);
        float theScaling = THUMB_WIDTH / theWidthPt; // resolution / 72.0F;

        Dimension thePageDimension = new Dimension((int) theWidthPt, (int) theHeightPt);
        BufferedImage theImage = new BufferedImage(theWidthPx, theHeightPx, BufferedImage.TYPE_INT_RGB);
        Graphics2D theGraphics = (Graphics2D) theImage.getGraphics();
        theGraphics.setBackground(new Color(255, 255, 255, 0));

        theGraphics.clearRect(0, 0, theImage.getWidth(), theImage.getHeight());
        theGraphics.scale(theScaling, theScaling);
        PageDrawer theDrawer = new PageDrawer();
        theDrawer.drawPage(theGraphics, theFirstPage, thePageDimension);
        int rotation = theFirstPage.findRotation();
        if ((rotation == 90) || (rotation == 270)) {
            int w = theImage.getWidth();
            int h = theImage.getHeight();
            BufferedImage rotatedImg = new BufferedImage(w, h, theImage.getType());
            Graphics2D g = rotatedImg.createGraphics();
            g.rotate(Math.toRadians(rotation), w / 2, h / 2);
            g.drawImage(theImage, null, 0, 0);
        }
        theGraphics.dispose();
        return new Preview(ImageUtils.rescale(theImage, THUMB_WIDTH, THUMB_HEIGHT,
                ImageUtils.RescaleMethod.RESIZE_FIT_ONE_DIMENSION));
    } catch (Exception e) {
        LOGGER.error("Error creating preview for " + aFile, e);
        return null;
    } finally {
        try {
            // Always close the document
            theDocument.close();
        } catch (Exception e) {
        }
    }
}

From source file:de.offis.health.icardea.cied.pdf.extractor.PDFApachePDFBoxExtractor.java

License:Apache License

public boolean openDocument(String fullPDFFilePath) throws IOException, Exception {
    boolean returnCode = false;

    if (fullPDFFilePath == null) {
        throw new Exception("There is no full path to a file given.");
    } // end if/*from   w ww .  j av  a  2s .c  o  m*/

    File pdfFile = new File(fullPDFFilePath);
    if (pdfFile.isFile() && pdfFile.canRead()) {
        this.fullPDFFilePath = pdfFile.getAbsolutePath();
        this.fullPDFDirectoryPath = pdfFile.getPath();

        logger.debug("FilePath.....: " + this.fullPDFFilePath);
        logger.debug("DirectoryPath: " + this.fullPDFDirectoryPath);

        // Open the PDF file
        pdfDocument = PDDocument.load(pdfFile.getAbsolutePath());

        logger.debug("PDF contains pages: " + pdfDocument.getNumberOfPages());

        // Remove reference to the file object as it is no longer needed (cleanup)
        pdfFile = null;

        returnCode = true;
    } else {
        throw new Exception("The given PDF file is not a file or not readable (check permissions).");
    } // end if..else
    return returnCode;
}

From source file:de.oio.jpdfunit.document.pdflibimpl.PdfBoxAnalyser.java

License:Open Source License

/**
 * This constructor uses a String parameter to instanciate the PDDocument.
 *
 * @param file//ww  w .java2 s  . c om
 *        The path and the file as String. I.e. "/home/bbratkus/test.pdf".
 * @throws IOException
 */
public PdfBoxAnalyser(final String file) throws IOException {
    if ((file.equals("") || (file == null))) //$NON-NLS-1$
    {
        throw new IllegalArgumentException(PARAMETER);
    }
    try {
        pdDocument = PDDocument.load(file);
    } catch (final IOException ioe) {
        throw new IllegalArgumentException(NODOCINIT);
    }
    setContentAsStringBuffer();
    getDocumentFonts();
}

From source file:de.oio.jpdfunit.document.pdflibimpl.PdfBoxAnalyser.java

License:Open Source License

/**
 * This constructor uses a InputStream as parameter to instanciate the
 * PDDocument.//from  w  ww  .  java2 s . co m
 *
 * @param pdfFileStream
 *        The Stream which the pdf file is within.
 * @throws IOException
 */
public PdfBoxAnalyser(final InputStream pdfStream) throws IOException {
    if (pdfStream == null) {
        throw new IllegalArgumentException(PARAMETER);
    }
    try {
        pdDocument = PDDocument.load(pdfStream);
    } catch (final IOException ioe) {
        throw new IllegalArgumentException(NODOCINIT);
    }
    setContentAsStringBuffer();
    getDocumentFonts();
}

From source file:de.prozesskraft.pkraft.Createdoc.java

/**
 * merge the pdfs/*from  w ww .j  a  va  2 s.c  o m*/
 */
private static void mergePdf(Map<String, String> pdfRankFiles, String output) {
    System.out.println("merging pdfs to a single file");

    Set<String> keySet = pdfRankFiles.keySet();
    ArrayList<String> listKey = new ArrayList(keySet);
    Collections.sort(listKey);

    try {
        PDDocument document = new PDDocument();
        //         if(document.getNumberOfPages() > 0)
        //         {
        //            System.out.println("deleting empty page");
        //            document.removePage(0);
        //         }
        for (String actualKey : listKey) {

            PDDocument part = PDDocument.load(pdfRankFiles.get(actualKey));
            System.out.println("merging " + pdfRankFiles.get(actualKey));
            ArrayList<PDPage> list = (ArrayList<PDPage>) part.getDocumentCatalog().getAllPages();
            for (PDPage page : list) {
                document.addPage(page);
            }

        }
        try {
            System.out.println("writing " + output);
            document.save(output);
        } catch (COSVisitorException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:de.redsix.pdfcompare.PdfComparator.java

License:Apache License

private void addSingleDocumentToResult(InputStream expectedPdfIS, int markerColor) throws IOException {
    try (PDDocument expectedDocument = PDDocument.load(expectedPdfIS)) {
        PDFRenderer expectedPdfRenderer = new PDFRenderer(expectedDocument);
        addExtraPages(expectedDocument, expectedPdfRenderer, 0, markerColor, true);
    }/* www  .j  av a 2s.  c o  m*/
}

From source file:de.tudarmstadt.ukp.dkpro.core.io.pdf.Pdf2CasConverter.java

License:Apache License

public void writeText(final CAS aCas, final InputStream aIs) throws IOException {
    final PDDocument doc = PDDocument.load(aIs);

    try {/*www.j ava 2s. c om*/
        if (doc.isEncrypted()) {
            throw new IOException("Encrypted documents currently not supported");
        }

        cas = aCas;
        text = new StringBuilder();

        writeText(doc);
    } finally {
        doc.close();
    }
}

From source file:de.uni_koeln.ub.drc.reader.PdfContentExtractor.java

License:Open Source License

/**
 * @param pdfName//from  ww  w  . j  a va 2s.c o m
 *            The full path to the PDF file to extract content from
 * @return The PageInfo object for the PDF
 */
public static PageInfo extractContentFromPdf(String pdfName) {
    try {
        location = pdfName;
        PDDocument document = PDDocument.load(new File(pdfName));
        PdfContentExtractor x = initExtractor(document);
        PageInfo result = x.toPageInfo();
        document.close();
        return result;
    } catch (IOException e) {
        e.printStackTrace();
    }
    return null;
}

From source file:de.uni_siegen.wineme.come_in.thumbnailer.thumbnailers.PDFBoxThumbnailer.java

License:Open Source License

@Override
public void generateThumbnail(final File input, final File output) throws IOException, ThumbnailerException {

    FileUtils.deleteQuietly(output);//from w ww . j  a v  a2  s .c  o m

    PDDocument document = null;
    try {
        try {
            document = PDDocument.load(input);
        } catch (final IOException e) {
            throw new ThumbnailerException("Could not load PDF File", e);
        }

        final List<?> pages = document.getDocumentCatalog().getAllPages();
        final PDPage page = (PDPage) pages.get(0);
        final BufferedImage tmpImage = this.writeImageForPage(document, page, BufferedImage.TYPE_INT_RGB);

        if (tmpImage.getWidth() == this.thumbWidth) {
            ImageIO.write(tmpImage, PDFBoxThumbnailer.OUTPUT_FORMAT, output);
        } else {
            final ResizeImage resizer = new ResizeImage(this.thumbWidth, this.thumbHeight);
            resizer.resizeMethod = ResizeImage.NO_RESIZE_ONLY_CROP;
            resizer.setInputImage(tmpImage);
            resizer.writeOutput(output);
        }
    }

    finally {
        if (document != null) {
            try {
                document.close();
            } catch (final IOException e) {
            }
        }
    }
}