Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:nominas.sei.NominasSEI.java

/**
 * @param args the command line arguments
 *//*from w  ww  . j a va  2  s  . c  o  m*/
public static void main(String[] args) {

    ArrayList<PaginaNomina> paginasNomina = new ArrayList<PaginaNomina>();

    for (int x = 0; x < 1; x++) {//RECORREMOS EL ARREGLO CON LOS NOMBRES DE ARCHIVO
        String ruta = new String();//VARIABLE QUE DETERMINARA LA RUTA DEL ARCHIVO A LEER.
        ruta = (".\\NOMINAS.pdf"); //SE ALMACENA LA RUTA DEL ARCHIVO A LEER. 

        try {
            PDDocument pd = PDDocument.load(ruta); //CARGAR EL PDF
            List l = pd.getDocumentCatalog().getAllPages();//NUMERO LAS PAGINAS DEL ARCHIVO
            Object[] obj = l.toArray();//METO EN UN OBJETO LA LISTA DE PAGINAS PARA MANIPULARLA
            for (int i = 0; i < l.size(); i++) {
                PDPage page = (PDPage) obj[i];//PAGE ES LA PAGINA 1 DE LA QUE CONSTA EL ARCHIVO
                PageFormat pageFormat = pd.getPageFormat(0);//PROPIEDADES DE LA PAGINA (FORMATO)
                Double d1 = new Double(pageFormat.getHeight());//ALTO
                Double d2 = new Double(pageFormat.getWidth());//ANCHO
                int width = d1.intValue();//ANCHO
                int eigth = 1024;//ALTO

                PDFTextStripperByArea stripper = new PDFTextStripperByArea();//COMPONENTE PARA ACCESO AL TEXTO
                Rectangle rect = new Rectangle(0, 0, width, eigth);//DEFNIR AREA DONDE SE BUSCARA EL TEXTO
                stripper.addRegion("area1", rect);//REGISTRAMOS LA REGION CON UN NOMBRE
                stripper.extractRegions(page);//EXTRAE TEXTO DEL AREA

                String contenido = new String();//CONTENIDO = A LO QUE CONTENGA EL AREA O REGION
                contenido = (stripper.getTextForRegion("area1"));
                String[] lines = contenido.split("[\\r\\n]+");
                String nombre = lines[1].substring(28, lines[1].length() - 10);
                PaginaNomina nomina = new PaginaNomina(page, nombre);
                paginasNomina.add(nomina);
            }
            Collections.sort(paginasNomina);
            // Create a new empty document
            PDDocument document = new PDDocument();

            for (int i = 0; i < paginasNomina.size(); i++) {
                System.out.println(paginasNomina.get(i).getNombre());
                document.addPage(paginasNomina.get(i).getPagina());
            }
            // Save the newly created document
            document.save("NominasOrdenadas.pdf");

            // finally make sure that the document is properly
            // closed.
            document.close();
            pd.close();//CERRAMOS OBJETO ACROBAT
        } catch (Exception e) {
            System.out.println(e.getMessage());
        } //CATCH
    } //FOR

}

From source file:noprint.NoPrint.java

/**
 * @param args the command line arguments
 * @throws IOException in case input file is can't be read or output written
 * @throws org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException
 * @throws org.apache.pdfbox.exceptions.COSVisitorException
 */// w  w  w. j a  v  a2 s .c  om
public static void main(String[] args) throws IOException, BadSecurityHandlerException, COSVisitorException {
    String infile = "input.pdf";
    String outfile = "output.pdf";

    String ownerPass = "";
    String userPass = "";
    /**
     * TODO: read up what the actual difference is between
     * userpassword and ownerpassword.
     */
    int keylength = 40;

    AccessPermission ap = new AccessPermission();
    PDDocument document = null;

    ap.setCanAssembleDocument(true);
    ap.setCanExtractContent(true);
    ap.setCanExtractForAccessibility(true);
    ap.setCanFillInForm(true);
    ap.setCanModify(true);
    ap.setCanModifyAnnotations(true);
    ap.setCanPrintDegraded(true);

    ap.setCanPrint(false);
    // YOU CAN'T PRINT
    // at least not when your PDFreader adheres to DRM (some don't)
    // also this is trivial to remove

    document = PDDocument.load(infile);

    if (!document.isEncrypted()) {
        StandardProtectionPolicy spp;
        spp = new StandardProtectionPolicy(ownerPass, userPass, ap);
        spp.setEncryptionKeyLength(keylength);
        document.protect(spp);
        document.save(outfile);
    }

    if (document != null) {
        document.close();
    }
}

From source file:nz.co.testamation.core.reader.pdf.PdfContentReaderImpl.java

License:Apache License

private String getPdfText(CloseableHttpResponse response) throws IOException {
    PDDocument load = PDDocument.load(response.getEntity().getContent());
    try {//from  w  w  w  .  jav a  2 s . c om
        return new PDFTextStripper().getText(load).replaceAll("\\s+", " ");
    } finally {
        load.close();
    }
}

From source file:org.ala.harvester.ExtractPubfSciNamesAndImages.java

License:Apache License

/**
 * This will print the documents text in a certain area.
 *
 * @param args The command line arguments.
 *
 * @throws Exception If there is an error parsing the document.
 *//*  www  .  j  a  v  a  2 s  . c o  m*/
public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        usage();
    } else {
        PDDocument document = null;
        try {
            document = PDDocument.load(args[0]);
            if (document.isEncrypted()) {
                try {
                    document.decrypt("");
                } catch (InvalidPasswordException e) {
                    System.err.println("Error: Document is encrypted with a password.");
                    System.exit(1);
                }
            }

            extractSciNameAndImages(document);

        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}

From source file:org.alfresco.extension.pdftoolkit.repo.action.executer.PDFAppendActionExecuter.java

License:Apache License

/**
 * @param reader// ww  w .j a v  a  2 s  .  co  m
 * @param writer
 * @param options
 * @throws Exception
 */
protected final void action(Action ruleAction, NodeRef actionedUponNodeRef, NodeRef targetNodeRef,
        ContentReader reader, ContentReader targetContentReader, Map<String, Object> options) {
    PDDocument pdf = null;
    PDDocument pdfTarget = null;
    InputStream is = null;
    InputStream tis = null;
    File tempDir = null;
    ContentWriter writer = null;

    try {
        is = reader.getContentInputStream();
        tis = targetContentReader.getContentInputStream();
        // stream the document in
        pdf = PDDocument.load(is);
        pdfTarget = PDDocument.load(tis);
        // Append the PDFs
        PDFMergerUtility merger = new PDFMergerUtility();
        merger.appendDocument(pdfTarget, pdf);
        merger.setDestinationFileName(options.get(PARAM_DESTINATION_NAME).toString());
        merger.mergeDocuments();

        // build a temp dir name based on the ID of the noderef we are
        // importing
        File alfTempDir = TempFileProvider.getTempDir();
        tempDir = new File(alfTempDir.getPath() + File.separatorChar + actionedUponNodeRef.getId());
        tempDir.mkdir();

        String fileName = options.get(PARAM_DESTINATION_NAME).toString();
        pdfTarget.save(tempDir + "" + File.separatorChar + fileName + FILE_EXTENSION);

        for (File file : tempDir.listFiles()) {
            try {
                if (file.isFile()) {
                    // Get a writer and prep it for putting it back into the
                    // repo
                    NodeRef destinationNode = createDestinationNode(file.getName(),
                            (NodeRef) ruleAction.getParameterValue(PARAM_DESTINATION_FOLDER),
                            actionedUponNodeRef);
                    writer = serviceRegistry.getContentService().getWriter(destinationNode,
                            ContentModel.PROP_CONTENT, true);

                    writer.setEncoding(reader.getEncoding()); // original
                                                              // encoding
                    writer.setMimetype(FILE_MIMETYPE);

                    // Put it in the repo
                    writer.putContent(file);

                    // Clean up
                    file.delete();
                }
            } catch (FileExistsException e) {
                throw new AlfrescoRuntimeException("Failed to process file.", e);
            }
        }
    } catch (COSVisitorException e) {
        throw new AlfrescoRuntimeException(e.getMessage(), e);
    } catch (IOException e) {
        throw new AlfrescoRuntimeException(e.getMessage(), e);
    }

    finally {
        if (pdf != null) {
            try {
                pdf.close();
            } catch (IOException e) {
                throw new AlfrescoRuntimeException(e.getMessage(), e);
            }
        }
        if (pdfTarget != null) {
            try {
                pdfTarget.close();
            } catch (IOException e) {
                throw new AlfrescoRuntimeException(e.getMessage(), e);
            }
        }
        if (is != null) {
            try {
                is.close();
            } catch (IOException e) {
                throw new AlfrescoRuntimeException(e.getMessage(), e);
            }
        }

        if (tempDir != null) {
            tempDir.delete();
        }
    }
}

From source file:org.alfresco.extension.pdftoolkit.repo.action.executer.PDFInsertAtPageActionExecuter.java

License:Apache License

/**
 * @param reader//from   ww  w  .  j ava2s  . c  o  m
 * @param writer
 * @param options
 * @throws Exception
 */
protected final void action(Action ruleAction, NodeRef actionedUponNodeRef, ContentReader reader,
        ContentReader insertReader, Map<String, Object> options) {
    PDDocument pdf = null;
    PDDocument insertContentPDF = null;
    InputStream is = null;
    InputStream cis = null;
    File tempDir = null;
    ContentWriter writer = null;

    try {

        int insertAt = Integer.valueOf((String) options.get(PARAM_INSERT_AT_PAGE)).intValue();

        // Get contentReader inputStream
        is = reader.getContentInputStream();
        // Get insertContentReader inputStream
        cis = insertReader.getContentInputStream();
        // stream the target document in
        pdf = PDDocument.load(is);
        // stream the insert content document in
        insertContentPDF = PDDocument.load(cis);

        // split the PDF and put the pages in a list
        Splitter splitter = new Splitter();
        // Need to adjust the input value to get the split at the right page
        splitter.setSplitAtPage(insertAt - 1);

        // Split the pages
        List<PDDocument> pdfs = splitter.split(pdf);

        // Build the output PDF
        PDFMergerUtility merger = new PDFMergerUtility();
        merger.appendDocument((PDDocument) pdfs.get(0), insertContentPDF);
        merger.appendDocument((PDDocument) pdfs.get(0), (PDDocument) pdfs.get(1));
        merger.setDestinationFileName(options.get(PARAM_DESTINATION_NAME).toString());
        merger.mergeDocuments();

        // build a temp dir, name based on the ID of the noderef we are
        // importing
        File alfTempDir = TempFileProvider.getTempDir();
        tempDir = new File(alfTempDir.getPath() + File.separatorChar + actionedUponNodeRef.getId());
        tempDir.mkdir();

        String fileName = options.get(PARAM_DESTINATION_NAME).toString();

        PDDocument completePDF = (PDDocument) pdfs.get(0);

        completePDF.save(tempDir + "" + File.separatorChar + fileName + FILE_EXTENSION);

        try {
            completePDF.close();
        } catch (IOException e) {
            throw new AlfrescoRuntimeException(e.getMessage(), e);
        }

        for (File file : tempDir.listFiles()) {
            try {
                if (file.isFile()) {

                    // Get a writer and prep it for putting it back into the
                    // repo
                    NodeRef destinationNode = createDestinationNode(file.getName(),
                            (NodeRef) ruleAction.getParameterValue(PARAM_DESTINATION_FOLDER),
                            actionedUponNodeRef);
                    writer = serviceRegistry.getContentService().getWriter(destinationNode,
                            ContentModel.PROP_CONTENT, true);

                    writer.setEncoding(reader.getEncoding()); // original
                    // encoding
                    writer.setMimetype(FILE_MIMETYPE);

                    // Put it in the repo
                    writer.putContent(file);

                    // Clean up
                    file.delete();
                }
            } catch (FileExistsException e) {
                throw new AlfrescoRuntimeException("Failed to process file.", e);
            }
        }
    }
    // TODO add better handling
    catch (COSVisitorException e) {
        throw new AlfrescoRuntimeException(e.getMessage(), e);
    } catch (IOException e) {
        throw new AlfrescoRuntimeException(e.getMessage(), e);
    }

    finally {
        if (pdf != null) {
            try {
                pdf.close();
            } catch (IOException e) {
                throw new AlfrescoRuntimeException(e.getMessage(), e);
            }
        }
        if (is != null) {
            try {
                is.close();
            } catch (IOException e) {
                throw new AlfrescoRuntimeException(e.getMessage(), e);
            }
        }

        if (tempDir != null) {
            tempDir.delete();
        }
    }
}

From source file:org.alfresco.repo.content.transform.TextToPdfContentTransformerTest.java

License:Open Source License

private void transformTextAndCheck(String text, String encoding, String checkText) throws IOException {
    // Get a reader for the text
    ContentReader reader = buildContentReader(text, Charset.forName(encoding));

    // And a temp writer
    File out = TempFileProvider.createTempFile("AlfrescoTest_", ".pdf");
    ContentWriter writer = new FileContentWriter(out);
    writer.setMimetype("application/pdf");

    // Transform to PDF
    transformer.transform(reader, writer);

    // Read back in the PDF and check it
    PDDocument doc = PDDocument.load(out);
    PDFTextStripper textStripper = new PDFTextStripper();
    StringWriter textWriter = new StringWriter();
    textStripper.writeText(doc, textWriter);
    doc.close();/*ww w .j a  v  a  2 s. c  o m*/

    String roundTrip = clean(textWriter.toString());

    assertEquals("Incorrect text in PDF when starting from text in " + encoding, checkText, roundTrip);
}

From source file:org.apache.camel.component.fop.FopComponentTest.java

License:Apache License

@Test
public void createPdfUsingXmlDataAndXsltTransformation() throws Exception {
    resultEndpoint.expectedMessageCount(1);
    FileInputStream inputStream = new FileInputStream("src/test/data/xml/data.xml");

    template.sendBody(inputStream);//from w w  w. ja  v  a  2s. c o m
    resultEndpoint.assertIsSatisfied();

    PDDocument document = PDDocument.load("target/data/result.pdf");
    String pdfText = FopHelper.extractTextFrom(document);
    assertTrue(pdfText.contains("Project")); //from xsl template
    assertTrue(pdfText.contains("John Doe")); //from data xml

    // assert on the header "foo" being populated
    Exchange exchange = resultEndpoint.getReceivedExchanges().get(0);
    assertEquals("Header value is lost!", "bar", exchange.getIn().getHeader("foo"));
}

From source file:org.apache.camel.component.fop.FopEndpointTest.java

License:Apache License

private PDDocument getDocumentFrom(Exchange exchange) throws IOException {
    InputStream inputStream = exchange.getOut().getBody(InputStream.class);
    return PDDocument.load(inputStream);
}

From source file:org.apache.camel.component.pdf.PdfAppendTest.java

License:Apache License

@Test
public void testAppend() throws Exception {
    final String originalText = "Test";
    final String textToAppend = "Append";
    PDDocument document = new PDDocument();
    PDPage page = new PDPage(PDPage.PAGE_SIZE_A4);
    document.addPage(page);// www .j av  a2s  . co  m
    PDPageContentStream contentStream = new PDPageContentStream(document, page);
    contentStream.setFont(PDType1Font.HELVETICA, 12);
    contentStream.beginText();
    contentStream.moveTextPositionByAmount(20, 400);
    contentStream.drawString(originalText);
    contentStream.endText();
    contentStream.close();

    template.sendBodyAndHeader("direct:start", textToAppend, PdfHeaderConstants.PDF_DOCUMENT_HEADER_NAME,
            document);

    resultEndpoint.setExpectedMessageCount(1);
    resultEndpoint.expectedMessagesMatches(new Predicate() {
        @Override
        public boolean matches(Exchange exchange) {
            Object body = exchange.getIn().getBody();
            assertThat(body, instanceOf(ByteArrayOutputStream.class));
            try {
                PDDocument doc = PDDocument
                        .load(new ByteArrayInputStream(((ByteArrayOutputStream) body).toByteArray()));
                PDFTextStripper pdfTextStripper = new PDFTextStripper();
                String text = pdfTextStripper.getText(doc);
                assertEquals(2, doc.getNumberOfPages());
                assertThat(text, containsString(originalText));
                assertThat(text, containsString(textToAppend));
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            return true;
        }
    });
    resultEndpoint.assertIsSatisfied();

}