List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:nominas.sei.NominasSEI.java
/** * @param args the command line arguments *//*from w ww . j a va 2 s . c o m*/ public static void main(String[] args) { ArrayList<PaginaNomina> paginasNomina = new ArrayList<PaginaNomina>(); for (int x = 0; x < 1; x++) {//RECORREMOS EL ARREGLO CON LOS NOMBRES DE ARCHIVO String ruta = new String();//VARIABLE QUE DETERMINARA LA RUTA DEL ARCHIVO A LEER. ruta = (".\\NOMINAS.pdf"); //SE ALMACENA LA RUTA DEL ARCHIVO A LEER. try { PDDocument pd = PDDocument.load(ruta); //CARGAR EL PDF List l = pd.getDocumentCatalog().getAllPages();//NUMERO LAS PAGINAS DEL ARCHIVO Object[] obj = l.toArray();//METO EN UN OBJETO LA LISTA DE PAGINAS PARA MANIPULARLA for (int i = 0; i < l.size(); i++) { PDPage page = (PDPage) obj[i];//PAGE ES LA PAGINA 1 DE LA QUE CONSTA EL ARCHIVO PageFormat pageFormat = pd.getPageFormat(0);//PROPIEDADES DE LA PAGINA (FORMATO) Double d1 = new Double(pageFormat.getHeight());//ALTO Double d2 = new Double(pageFormat.getWidth());//ANCHO int width = d1.intValue();//ANCHO int eigth = 1024;//ALTO PDFTextStripperByArea stripper = new PDFTextStripperByArea();//COMPONENTE PARA ACCESO AL TEXTO Rectangle rect = new Rectangle(0, 0, width, eigth);//DEFNIR AREA DONDE SE BUSCARA EL TEXTO stripper.addRegion("area1", rect);//REGISTRAMOS LA REGION CON UN NOMBRE stripper.extractRegions(page);//EXTRAE TEXTO DEL AREA String contenido = new String();//CONTENIDO = A LO QUE CONTENGA EL AREA O REGION contenido = (stripper.getTextForRegion("area1")); String[] lines = contenido.split("[\\r\\n]+"); String nombre = lines[1].substring(28, lines[1].length() - 10); PaginaNomina nomina = new PaginaNomina(page, nombre); paginasNomina.add(nomina); } Collections.sort(paginasNomina); // Create a new empty document PDDocument document = new PDDocument(); for (int i = 0; i < paginasNomina.size(); i++) { System.out.println(paginasNomina.get(i).getNombre()); document.addPage(paginasNomina.get(i).getPagina()); } // Save the newly created document document.save("NominasOrdenadas.pdf"); // finally make sure that the document is properly // closed. document.close(); pd.close();//CERRAMOS OBJETO ACROBAT } catch (Exception e) { System.out.println(e.getMessage()); } //CATCH } //FOR }
From source file:noprint.NoPrint.java
/** * @param args the command line arguments * @throws IOException in case input file is can't be read or output written * @throws org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException * @throws org.apache.pdfbox.exceptions.COSVisitorException */// w w w. j a v a2 s .c om public static void main(String[] args) throws IOException, BadSecurityHandlerException, COSVisitorException { String infile = "input.pdf"; String outfile = "output.pdf"; String ownerPass = ""; String userPass = ""; /** * TODO: read up what the actual difference is between * userpassword and ownerpassword. */ int keylength = 40; AccessPermission ap = new AccessPermission(); PDDocument document = null; ap.setCanAssembleDocument(true); ap.setCanExtractContent(true); ap.setCanExtractForAccessibility(true); ap.setCanFillInForm(true); ap.setCanModify(true); ap.setCanModifyAnnotations(true); ap.setCanPrintDegraded(true); ap.setCanPrint(false); // YOU CAN'T PRINT // at least not when your PDFreader adheres to DRM (some don't) // also this is trivial to remove document = PDDocument.load(infile); if (!document.isEncrypted()) { StandardProtectionPolicy spp; spp = new StandardProtectionPolicy(ownerPass, userPass, ap); spp.setEncryptionKeyLength(keylength); document.protect(spp); document.save(outfile); } if (document != null) { document.close(); } }
From source file:nz.co.testamation.core.reader.pdf.PdfContentReaderImpl.java
License:Apache License
private String getPdfText(CloseableHttpResponse response) throws IOException { PDDocument load = PDDocument.load(response.getEntity().getContent()); try {//from w w w . jav a 2 s . c om return new PDFTextStripper().getText(load).replaceAll("\\s+", " "); } finally { load.close(); } }
From source file:org.ala.harvester.ExtractPubfSciNamesAndImages.java
License:Apache License
/** * This will print the documents text in a certain area. * * @param args The command line arguments. * * @throws Exception If there is an error parsing the document. *//* www . j a v a 2 s . c o m*/ public static void main(String[] args) throws Exception { if (args.length != 1) { usage(); } else { PDDocument document = null; try { document = PDDocument.load(args[0]); if (document.isEncrypted()) { try { document.decrypt(""); } catch (InvalidPasswordException e) { System.err.println("Error: Document is encrypted with a password."); System.exit(1); } } extractSciNameAndImages(document); } finally { if (document != null) { document.close(); } } } }
From source file:org.alfresco.extension.pdftoolkit.repo.action.executer.PDFAppendActionExecuter.java
License:Apache License
/** * @param reader// ww w .j a v a 2 s . co m * @param writer * @param options * @throws Exception */ protected final void action(Action ruleAction, NodeRef actionedUponNodeRef, NodeRef targetNodeRef, ContentReader reader, ContentReader targetContentReader, Map<String, Object> options) { PDDocument pdf = null; PDDocument pdfTarget = null; InputStream is = null; InputStream tis = null; File tempDir = null; ContentWriter writer = null; try { is = reader.getContentInputStream(); tis = targetContentReader.getContentInputStream(); // stream the document in pdf = PDDocument.load(is); pdfTarget = PDDocument.load(tis); // Append the PDFs PDFMergerUtility merger = new PDFMergerUtility(); merger.appendDocument(pdfTarget, pdf); merger.setDestinationFileName(options.get(PARAM_DESTINATION_NAME).toString()); merger.mergeDocuments(); // build a temp dir name based on the ID of the noderef we are // importing File alfTempDir = TempFileProvider.getTempDir(); tempDir = new File(alfTempDir.getPath() + File.separatorChar + actionedUponNodeRef.getId()); tempDir.mkdir(); String fileName = options.get(PARAM_DESTINATION_NAME).toString(); pdfTarget.save(tempDir + "" + File.separatorChar + fileName + FILE_EXTENSION); for (File file : tempDir.listFiles()) { try { if (file.isFile()) { // Get a writer and prep it for putting it back into the // repo NodeRef destinationNode = createDestinationNode(file.getName(), (NodeRef) ruleAction.getParameterValue(PARAM_DESTINATION_FOLDER), actionedUponNodeRef); writer = serviceRegistry.getContentService().getWriter(destinationNode, ContentModel.PROP_CONTENT, true); writer.setEncoding(reader.getEncoding()); // original // encoding writer.setMimetype(FILE_MIMETYPE); // Put it in the repo writer.putContent(file); // Clean up file.delete(); } } catch (FileExistsException e) { throw new AlfrescoRuntimeException("Failed to process file.", e); } } } catch (COSVisitorException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } finally { if (pdf != null) { try { pdf.close(); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } } if (pdfTarget != null) { try { pdfTarget.close(); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } } if (is != null) { try { is.close(); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } } if (tempDir != null) { tempDir.delete(); } } }
From source file:org.alfresco.extension.pdftoolkit.repo.action.executer.PDFInsertAtPageActionExecuter.java
License:Apache License
/** * @param reader//from ww w . j ava2s . c o m * @param writer * @param options * @throws Exception */ protected final void action(Action ruleAction, NodeRef actionedUponNodeRef, ContentReader reader, ContentReader insertReader, Map<String, Object> options) { PDDocument pdf = null; PDDocument insertContentPDF = null; InputStream is = null; InputStream cis = null; File tempDir = null; ContentWriter writer = null; try { int insertAt = Integer.valueOf((String) options.get(PARAM_INSERT_AT_PAGE)).intValue(); // Get contentReader inputStream is = reader.getContentInputStream(); // Get insertContentReader inputStream cis = insertReader.getContentInputStream(); // stream the target document in pdf = PDDocument.load(is); // stream the insert content document in insertContentPDF = PDDocument.load(cis); // split the PDF and put the pages in a list Splitter splitter = new Splitter(); // Need to adjust the input value to get the split at the right page splitter.setSplitAtPage(insertAt - 1); // Split the pages List<PDDocument> pdfs = splitter.split(pdf); // Build the output PDF PDFMergerUtility merger = new PDFMergerUtility(); merger.appendDocument((PDDocument) pdfs.get(0), insertContentPDF); merger.appendDocument((PDDocument) pdfs.get(0), (PDDocument) pdfs.get(1)); merger.setDestinationFileName(options.get(PARAM_DESTINATION_NAME).toString()); merger.mergeDocuments(); // build a temp dir, name based on the ID of the noderef we are // importing File alfTempDir = TempFileProvider.getTempDir(); tempDir = new File(alfTempDir.getPath() + File.separatorChar + actionedUponNodeRef.getId()); tempDir.mkdir(); String fileName = options.get(PARAM_DESTINATION_NAME).toString(); PDDocument completePDF = (PDDocument) pdfs.get(0); completePDF.save(tempDir + "" + File.separatorChar + fileName + FILE_EXTENSION); try { completePDF.close(); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } for (File file : tempDir.listFiles()) { try { if (file.isFile()) { // Get a writer and prep it for putting it back into the // repo NodeRef destinationNode = createDestinationNode(file.getName(), (NodeRef) ruleAction.getParameterValue(PARAM_DESTINATION_FOLDER), actionedUponNodeRef); writer = serviceRegistry.getContentService().getWriter(destinationNode, ContentModel.PROP_CONTENT, true); writer.setEncoding(reader.getEncoding()); // original // encoding writer.setMimetype(FILE_MIMETYPE); // Put it in the repo writer.putContent(file); // Clean up file.delete(); } } catch (FileExistsException e) { throw new AlfrescoRuntimeException("Failed to process file.", e); } } } // TODO add better handling catch (COSVisitorException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } finally { if (pdf != null) { try { pdf.close(); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } } if (is != null) { try { is.close(); } catch (IOException e) { throw new AlfrescoRuntimeException(e.getMessage(), e); } } if (tempDir != null) { tempDir.delete(); } } }
From source file:org.alfresco.repo.content.transform.TextToPdfContentTransformerTest.java
License:Open Source License
private void transformTextAndCheck(String text, String encoding, String checkText) throws IOException { // Get a reader for the text ContentReader reader = buildContentReader(text, Charset.forName(encoding)); // And a temp writer File out = TempFileProvider.createTempFile("AlfrescoTest_", ".pdf"); ContentWriter writer = new FileContentWriter(out); writer.setMimetype("application/pdf"); // Transform to PDF transformer.transform(reader, writer); // Read back in the PDF and check it PDDocument doc = PDDocument.load(out); PDFTextStripper textStripper = new PDFTextStripper(); StringWriter textWriter = new StringWriter(); textStripper.writeText(doc, textWriter); doc.close();/*ww w .j a v a 2 s. c o m*/ String roundTrip = clean(textWriter.toString()); assertEquals("Incorrect text in PDF when starting from text in " + encoding, checkText, roundTrip); }
From source file:org.apache.camel.component.fop.FopComponentTest.java
License:Apache License
@Test public void createPdfUsingXmlDataAndXsltTransformation() throws Exception { resultEndpoint.expectedMessageCount(1); FileInputStream inputStream = new FileInputStream("src/test/data/xml/data.xml"); template.sendBody(inputStream);//from w w w. ja v a 2s. c o m resultEndpoint.assertIsSatisfied(); PDDocument document = PDDocument.load("target/data/result.pdf"); String pdfText = FopHelper.extractTextFrom(document); assertTrue(pdfText.contains("Project")); //from xsl template assertTrue(pdfText.contains("John Doe")); //from data xml // assert on the header "foo" being populated Exchange exchange = resultEndpoint.getReceivedExchanges().get(0); assertEquals("Header value is lost!", "bar", exchange.getIn().getHeader("foo")); }
From source file:org.apache.camel.component.fop.FopEndpointTest.java
License:Apache License
private PDDocument getDocumentFrom(Exchange exchange) throws IOException { InputStream inputStream = exchange.getOut().getBody(InputStream.class); return PDDocument.load(inputStream); }
From source file:org.apache.camel.component.pdf.PdfAppendTest.java
License:Apache License
@Test public void testAppend() throws Exception { final String originalText = "Test"; final String textToAppend = "Append"; PDDocument document = new PDDocument(); PDPage page = new PDPage(PDPage.PAGE_SIZE_A4); document.addPage(page);// www .j av a2s . co m PDPageContentStream contentStream = new PDPageContentStream(document, page); contentStream.setFont(PDType1Font.HELVETICA, 12); contentStream.beginText(); contentStream.moveTextPositionByAmount(20, 400); contentStream.drawString(originalText); contentStream.endText(); contentStream.close(); template.sendBodyAndHeader("direct:start", textToAppend, PdfHeaderConstants.PDF_DOCUMENT_HEADER_NAME, document); resultEndpoint.setExpectedMessageCount(1); resultEndpoint.expectedMessagesMatches(new Predicate() { @Override public boolean matches(Exchange exchange) { Object body = exchange.getIn().getBody(); assertThat(body, instanceOf(ByteArrayOutputStream.class)); try { PDDocument doc = PDDocument .load(new ByteArrayInputStream(((ByteArrayOutputStream) body).toByteArray())); PDFTextStripper pdfTextStripper = new PDFTextStripper(); String text = pdfTextStripper.getText(doc); assertEquals(2, doc.getNumberOfPages()); assertThat(text, containsString(originalText)); assertThat(text, containsString(textToAppend)); } catch (IOException e) { throw new RuntimeException(e); } return true; } }); resultEndpoint.assertIsSatisfied(); }