Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:org.deepfs.fsml.xdcr.PDFTransducer.java

@Override
public String read(final String path) {
    PDDocument pd = null;//from  w  w w  .  j a  v  a 2 s . c  o  m
    final StringWriter sw = new StringWriter();
    final StringBuilder sb = new StringBuilder(128);
    try {
        pd = PDDocument.load(path);
        PDDocumentInformation info = pd.getDocumentInformation();
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.setEndPage(NO_PAGES);
        stripper.writeText(pd, sw);
        sb.append(keyValue(NS + "title", info.getTitle()));
        sb.append(keyValue(NS + "subject", info.getSubject()));
        sb.append(keyValue(NS + "creator", info.getCreator()));
        sb.append(keyValue(NS + "author", info.getAuthor()));
        sb.append(keyValue(NS + "producer", info.getProducer()));
        sb.append(keyValue(NS + "date",
                info.getCreationDate() != null ? new SimpleDateFormat().format(info.getCreationDate().getTime())
                        : null));
        sb.append(keyValue(NS + "content", sw.getBuffer().toString()));

        sb.append(keyValue(NS + "keywords", info.getKeywords()));
    } catch (IOException e) {
        e.printStackTrace();
    } catch (IndexOutOfBoundsException oe) {
        // [MS] thanks apache pdfbox :-)
        System.err.println(oe.getMessage());
    }
    return sb.toString();
}

From source file:org.deidentifier.arx.certificate.ARXCertificate.java

License:Apache License

/**
 * Renders the document into the given output stream
 * /*from  ww  w  .  j a v  a  2  s.c om*/
 * @param stream
 * @throws IOException 
 */
public void save(OutputStream stream) throws IOException {

    // Render
    Document document = new Document(style.gethMargin(), style.gethMargin(), style.getvMargin(),
            style.getvMargin());
    for (Element element : this.elements) {
        element.render(document, 0, this.style);
    }

    // Save to temp file
    File tmp = File.createTempFile("arx", "certificate");
    document.save(tmp);

    // Load and watermark
    PDDocument pdDocument = PDDocument.load(tmp);
    Watermark watermark = new Watermark(pdDocument);
    watermark.mark(pdDocument);

    // Save
    pdDocument.save(stream);
    pdDocument.close();
    tmp.delete();
}

From source file:org.deidentifier.arx.certificate.resources.Watermark.java

License:Apache License

/**
 * Creates a new instance//from w ww.j a va2 s .c  om
 * @param document
 * @throws IOException
 */
public Watermark(PDDocument document) throws IOException {
    this.watermark = PDDocument.load(Watermark.class.getResourceAsStream("watermark.pdf"));
}

From source file:org.dspace.app.mediafilter.PDFBoxThumbnail.java

License:BSD License

/**
 * @param source/*from www  . ja  va  2 s.  c  o m*/
 *            source input stream
 * 
 * @return InputStream the resulting input stream
 */
@Override
public InputStream getDestinationStream(Item currentItem, InputStream source, boolean verbose)
        throws Exception {
    PDDocument doc = PDDocument.load(source);
    PDFRenderer renderer = new PDFRenderer(doc);
    BufferedImage buf = renderer.renderImage(0);
    //        ImageIO.write(buf, "PNG", new File("custom-render.png"));
    doc.close();

    JPEGFilter jpegFilter = new JPEGFilter();
    return jpegFilter.getThumb(currentItem, buf, verbose);
}

From source file:org.dspace.app.mediafilter.PDFFilter.java

License:BSD License

/**
 * @param source/* ww  w . j  a v a 2s .  c o  m*/
 *            source input stream
 *
 * @return InputStream the resulting input stream
 */
public InputStream getDestinationStream(InputStream source) throws Exception {
    try {
        boolean useTemporaryFile = ConfigurationManager.getBooleanProperty("pdffilter.largepdfs", false);

        // get input stream from bitstream
        // pass to filter, get string back
        PDFTextStripper pts = new PDFTextStripper();
        PDDocument pdfDoc = null;
        Writer writer = null;
        File tempTextFile = null;
        ByteArrayOutputStream byteStream = null;

        if (useTemporaryFile) {
            tempTextFile = File.createTempFile("dspacepdfextract" + source.hashCode(), ".txt");
            tempTextFile.deleteOnExit();
            writer = new OutputStreamWriter(new FileOutputStream(tempTextFile));
        } else {
            byteStream = new ByteArrayOutputStream();
            writer = new OutputStreamWriter(byteStream);
        }

        try {
            pdfDoc = PDDocument.load(source);
            pts.writeText(pdfDoc, writer);
        } finally {
            try {
                if (pdfDoc != null) {
                    pdfDoc.close();
                }
            } catch (Exception e) {
                log.error("Error closing PDF file: " + e.getMessage(), e);
            }

            try {
                writer.close();
            } catch (Exception e) {
                log.error("Error closing temporary extract file: " + e.getMessage(), e);
            }
        }

        if (useTemporaryFile) {
            return new FileInputStream(tempTextFile);
        } else {
            byte[] bytes = byteStream.toByteArray();
            return new ByteArrayInputStream(bytes);
        }
    } catch (OutOfMemoryError oome) {
        log.error("Error parsing PDF document " + oome.getMessage(), oome);
        if (!ConfigurationManager.getBooleanProperty("pdffilter.skiponmemoryexception", false)) {
            throw oome;
        }
    }

    return null;
}

From source file:org.dspace.app.rest.BitstreamContentRestControllerIT.java

License:BSD License

private String extractPDFText(byte[] content) throws IOException {
    PDFTextStripper pts = new PDFTextStripper();
    pts.setSortByPosition(true);//from   www. j  a v a  2  s  . c  om

    try (ByteArrayInputStream source = new ByteArrayInputStream(content);
            Writer writer = new StringWriter();
            PDDocument pdfDoc = PDDocument.load(source)) {

        pts.writeText(pdfDoc, writer);
        return writer.toString();
    }
}

From source file:org.dspace.app.rest.BitstreamContentRestControllerIT.java

License:BSD License

private int getNumberOfPdfPages(byte[] content) throws IOException {
    try (ByteArrayInputStream source = new ByteArrayInputStream(content);
            PDDocument pdfDoc = PDDocument.load(source)) {
        return pdfDoc.getNumberOfPages();
    }//w  w w.  j  a  v a2  s.  co m
}

From source file:org.dspace.disseminate.CitationDocument.java

License:BSD License

/**
 * Creates a//from w  ww. j a v a 2 s  .  c  o  m
 * cited document from the given bitstream of the given item. This
 * requires that bitstream is contained in item.
 * <p>
 * The Process for adding a cover page is as follows:
 * <ol>
 *  <li> Load source file into PdfReader and create a
 *     Document to put our cover page into.</li>
 *  <li> Create cover page and add content to it.</li>
 *  <li> Concatenate the coverpage and the source
 *     document.</li>
 * </p>
 *
 * @param bitstream The source bitstream being cited. This must be a PDF.
 * @return The temporary File that is the finished, cited document.
 * @throws java.io.FileNotFoundException
 * @throws SQLException
 * @throws org.dspace.authorize.AuthorizeException
 */
public File makeCitedDocument(Bitstream bitstream)
        throws IOException, SQLException, AuthorizeException, COSVisitorException {
    PDDocument document = new PDDocument();
    PDDocument sourceDocument = new PDDocument();
    try {
        Item item = (Item) bitstream.getParentObject();
        sourceDocument = sourceDocument.load(bitstream.retrieve());
        PDPage coverPage = new PDPage(PDPage.PAGE_SIZE_LETTER);
        generateCoverPage(document, coverPage, item);
        addCoverPageToDocument(document, sourceDocument, coverPage);

        document.save(tempDir.getAbsolutePath() + "/bitstream.cover.pdf");
        return new File(tempDir.getAbsolutePath() + "/bitstream.cover.pdf");
    } finally {
        sourceDocument.close();
        document.close();
    }
}

From source file:org.dspace.disseminate.CitationDocumentServiceImpl.java

License:BSD License

@Override
public File makeCitedDocument(Context context, Bitstream bitstream)
        throws IOException, SQLException, AuthorizeException {
    PDDocument document = new PDDocument();
    PDDocument sourceDocument = new PDDocument();
    try {//  ww w.  ja va  2  s.  c  o m
        Item item = (Item) bitstreamService.getParentObject(context, bitstream);
        sourceDocument = sourceDocument.load(bitstreamService.retrieve(context, bitstream));
        PDPage coverPage = new PDPage(PDRectangle.LETTER); // TODO: needs to be configurable
        generateCoverPage(context, document, coverPage, item);
        addCoverPageToDocument(document, sourceDocument, coverPage);

        document.save(tempDir.getAbsolutePath() + "/bitstream.cover.pdf");
        return new File(tempDir.getAbsolutePath() + "/bitstream.cover.pdf");
    } finally {
        sourceDocument.close();
        document.close();
    }
}

From source file:org.ednovo.gooru.application.util.ResourceImageUtil.java

License:Open Source License

public void moveAttachment(Resource newResource, Resource resource) {
    try {// w  w w .  j  a v a  2 s.co  m
        File parentFolderFile = new File(
                UserGroupSupport.getUserOrganizationNfsInternalPath() + resource.getFolder());
        if (!parentFolderFile.exists()) {
            parentFolderFile.mkdirs();
        }
        String fileExtension = org.apache.commons.lang.StringUtils
                .substringAfterLast(newResource.getAttach().getMediaFilename(), ".");
        if (BaseUtil.supportedDocument().containsKey(fileExtension)) {
            this.convertDoctoPdf(resource, newResource.getAttach().getMediaFilename(),
                    newResource.getAttach().getFilename());
        } else {
            File file = new File(UserGroupSupport.getUserOrganizationNfsInternalPath()
                    + Constants.UPLOADED_MEDIA_FOLDER + "/" + newResource.getAttach().getMediaFilename());
            if (fileExtension.equalsIgnoreCase(PDF)) {
                PDDocument doc = PDDocument.load(file);
                ResourceInfo resourceInfo = new ResourceInfo();
                resourceInfo.setResource(resource);
                resourceInfo.setNumOfPages(doc.getNumberOfPages());
                resourceInfo.setLastUpdated(resource.getLastModified());
                this.resourceRepository.save(resourceInfo);
                resource.setResourceInfo(resourceInfo);
            }

            file.renameTo(new File(UserGroupSupport.getUserOrganizationNfsInternalPath() + resource.getFolder()
                    + "/" + newResource.getAttach().getFilename()));
            if (newResource.getThumbnail() == null) {
                this.downloadAndSendMsgToGenerateThumbnails(resource, null);
            }
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}