List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:org.deepfs.fsml.xdcr.PDFTransducer.java
@Override public String read(final String path) { PDDocument pd = null;//from w w w . j a v a 2 s . c o m final StringWriter sw = new StringWriter(); final StringBuilder sb = new StringBuilder(128); try { pd = PDDocument.load(path); PDDocumentInformation info = pd.getDocumentInformation(); PDFTextStripper stripper = new PDFTextStripper(); stripper.setEndPage(NO_PAGES); stripper.writeText(pd, sw); sb.append(keyValue(NS + "title", info.getTitle())); sb.append(keyValue(NS + "subject", info.getSubject())); sb.append(keyValue(NS + "creator", info.getCreator())); sb.append(keyValue(NS + "author", info.getAuthor())); sb.append(keyValue(NS + "producer", info.getProducer())); sb.append(keyValue(NS + "date", info.getCreationDate() != null ? new SimpleDateFormat().format(info.getCreationDate().getTime()) : null)); sb.append(keyValue(NS + "content", sw.getBuffer().toString())); sb.append(keyValue(NS + "keywords", info.getKeywords())); } catch (IOException e) { e.printStackTrace(); } catch (IndexOutOfBoundsException oe) { // [MS] thanks apache pdfbox :-) System.err.println(oe.getMessage()); } return sb.toString(); }
From source file:org.deidentifier.arx.certificate.ARXCertificate.java
License:Apache License
/** * Renders the document into the given output stream * /*from ww w . j a v a 2 s.c om*/ * @param stream * @throws IOException */ public void save(OutputStream stream) throws IOException { // Render Document document = new Document(style.gethMargin(), style.gethMargin(), style.getvMargin(), style.getvMargin()); for (Element element : this.elements) { element.render(document, 0, this.style); } // Save to temp file File tmp = File.createTempFile("arx", "certificate"); document.save(tmp); // Load and watermark PDDocument pdDocument = PDDocument.load(tmp); Watermark watermark = new Watermark(pdDocument); watermark.mark(pdDocument); // Save pdDocument.save(stream); pdDocument.close(); tmp.delete(); }
From source file:org.deidentifier.arx.certificate.resources.Watermark.java
License:Apache License
/** * Creates a new instance//from w ww.j a va2 s .c om * @param document * @throws IOException */ public Watermark(PDDocument document) throws IOException { this.watermark = PDDocument.load(Watermark.class.getResourceAsStream("watermark.pdf")); }
From source file:org.dspace.app.mediafilter.PDFBoxThumbnail.java
License:BSD License
/** * @param source/*from www . ja va 2 s. c o m*/ * source input stream * * @return InputStream the resulting input stream */ @Override public InputStream getDestinationStream(Item currentItem, InputStream source, boolean verbose) throws Exception { PDDocument doc = PDDocument.load(source); PDFRenderer renderer = new PDFRenderer(doc); BufferedImage buf = renderer.renderImage(0); // ImageIO.write(buf, "PNG", new File("custom-render.png")); doc.close(); JPEGFilter jpegFilter = new JPEGFilter(); return jpegFilter.getThumb(currentItem, buf, verbose); }
From source file:org.dspace.app.mediafilter.PDFFilter.java
License:BSD License
/** * @param source/* ww w . j a v a 2s . c o m*/ * source input stream * * @return InputStream the resulting input stream */ public InputStream getDestinationStream(InputStream source) throws Exception { try { boolean useTemporaryFile = ConfigurationManager.getBooleanProperty("pdffilter.largepdfs", false); // get input stream from bitstream // pass to filter, get string back PDFTextStripper pts = new PDFTextStripper(); PDDocument pdfDoc = null; Writer writer = null; File tempTextFile = null; ByteArrayOutputStream byteStream = null; if (useTemporaryFile) { tempTextFile = File.createTempFile("dspacepdfextract" + source.hashCode(), ".txt"); tempTextFile.deleteOnExit(); writer = new OutputStreamWriter(new FileOutputStream(tempTextFile)); } else { byteStream = new ByteArrayOutputStream(); writer = new OutputStreamWriter(byteStream); } try { pdfDoc = PDDocument.load(source); pts.writeText(pdfDoc, writer); } finally { try { if (pdfDoc != null) { pdfDoc.close(); } } catch (Exception e) { log.error("Error closing PDF file: " + e.getMessage(), e); } try { writer.close(); } catch (Exception e) { log.error("Error closing temporary extract file: " + e.getMessage(), e); } } if (useTemporaryFile) { return new FileInputStream(tempTextFile); } else { byte[] bytes = byteStream.toByteArray(); return new ByteArrayInputStream(bytes); } } catch (OutOfMemoryError oome) { log.error("Error parsing PDF document " + oome.getMessage(), oome); if (!ConfigurationManager.getBooleanProperty("pdffilter.skiponmemoryexception", false)) { throw oome; } } return null; }
From source file:org.dspace.app.rest.BitstreamContentRestControllerIT.java
License:BSD License
private String extractPDFText(byte[] content) throws IOException { PDFTextStripper pts = new PDFTextStripper(); pts.setSortByPosition(true);//from www. j a v a 2 s . c om try (ByteArrayInputStream source = new ByteArrayInputStream(content); Writer writer = new StringWriter(); PDDocument pdfDoc = PDDocument.load(source)) { pts.writeText(pdfDoc, writer); return writer.toString(); } }
From source file:org.dspace.app.rest.BitstreamContentRestControllerIT.java
License:BSD License
private int getNumberOfPdfPages(byte[] content) throws IOException { try (ByteArrayInputStream source = new ByteArrayInputStream(content); PDDocument pdfDoc = PDDocument.load(source)) { return pdfDoc.getNumberOfPages(); }//w w w. j a v a2 s. co m }
From source file:org.dspace.disseminate.CitationDocument.java
License:BSD License
/** * Creates a//from w ww. j a v a 2 s . c o m * cited document from the given bitstream of the given item. This * requires that bitstream is contained in item. * <p> * The Process for adding a cover page is as follows: * <ol> * <li> Load source file into PdfReader and create a * Document to put our cover page into.</li> * <li> Create cover page and add content to it.</li> * <li> Concatenate the coverpage and the source * document.</li> * </p> * * @param bitstream The source bitstream being cited. This must be a PDF. * @return The temporary File that is the finished, cited document. * @throws java.io.FileNotFoundException * @throws SQLException * @throws org.dspace.authorize.AuthorizeException */ public File makeCitedDocument(Bitstream bitstream) throws IOException, SQLException, AuthorizeException, COSVisitorException { PDDocument document = new PDDocument(); PDDocument sourceDocument = new PDDocument(); try { Item item = (Item) bitstream.getParentObject(); sourceDocument = sourceDocument.load(bitstream.retrieve()); PDPage coverPage = new PDPage(PDPage.PAGE_SIZE_LETTER); generateCoverPage(document, coverPage, item); addCoverPageToDocument(document, sourceDocument, coverPage); document.save(tempDir.getAbsolutePath() + "/bitstream.cover.pdf"); return new File(tempDir.getAbsolutePath() + "/bitstream.cover.pdf"); } finally { sourceDocument.close(); document.close(); } }
From source file:org.dspace.disseminate.CitationDocumentServiceImpl.java
License:BSD License
@Override public File makeCitedDocument(Context context, Bitstream bitstream) throws IOException, SQLException, AuthorizeException { PDDocument document = new PDDocument(); PDDocument sourceDocument = new PDDocument(); try {// ww w. ja va 2 s. c o m Item item = (Item) bitstreamService.getParentObject(context, bitstream); sourceDocument = sourceDocument.load(bitstreamService.retrieve(context, bitstream)); PDPage coverPage = new PDPage(PDRectangle.LETTER); // TODO: needs to be configurable generateCoverPage(context, document, coverPage, item); addCoverPageToDocument(document, sourceDocument, coverPage); document.save(tempDir.getAbsolutePath() + "/bitstream.cover.pdf"); return new File(tempDir.getAbsolutePath() + "/bitstream.cover.pdf"); } finally { sourceDocument.close(); document.close(); } }
From source file:org.ednovo.gooru.application.util.ResourceImageUtil.java
License:Open Source License
public void moveAttachment(Resource newResource, Resource resource) { try {// w w w . j a v a 2 s.co m File parentFolderFile = new File( UserGroupSupport.getUserOrganizationNfsInternalPath() + resource.getFolder()); if (!parentFolderFile.exists()) { parentFolderFile.mkdirs(); } String fileExtension = org.apache.commons.lang.StringUtils .substringAfterLast(newResource.getAttach().getMediaFilename(), "."); if (BaseUtil.supportedDocument().containsKey(fileExtension)) { this.convertDoctoPdf(resource, newResource.getAttach().getMediaFilename(), newResource.getAttach().getFilename()); } else { File file = new File(UserGroupSupport.getUserOrganizationNfsInternalPath() + Constants.UPLOADED_MEDIA_FOLDER + "/" + newResource.getAttach().getMediaFilename()); if (fileExtension.equalsIgnoreCase(PDF)) { PDDocument doc = PDDocument.load(file); ResourceInfo resourceInfo = new ResourceInfo(); resourceInfo.setResource(resource); resourceInfo.setNumOfPages(doc.getNumberOfPages()); resourceInfo.setLastUpdated(resource.getLastModified()); this.resourceRepository.save(resourceInfo); resource.setResourceInfo(resourceInfo); } file.renameTo(new File(UserGroupSupport.getUserOrganizationNfsInternalPath() + resource.getFolder() + "/" + newResource.getAttach().getFilename())); if (newResource.getThumbnail() == null) { this.downloadAndSendMsgToGenerateThumbnails(resource, null); } } } catch (Exception e) { e.printStackTrace(); } }