Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:com.fangxin365.core.utils.PDFMerger.java

License:Apache License

/**
 * Merge the list of source documents, saving the result in the destination
 * file./*w ww. j a  va 2s .  c o m*/
 * 
 * @throws IOException
 *             If there is an error saving the document.
 * @throws COSVisitorException
 *             If an error occurs while saving the destination file.
 */
public void mergeDocuments() throws IOException, COSVisitorException {
    PDDocument destination = null;
    InputStream sourceFile;
    PDDocument source;
    if (sources != null && sources.size() > 0) {
        java.util.Vector<PDDocument> tobeclosed = new java.util.Vector<PDDocument>();

        try {
            Iterator<InputStream> sit = sources.iterator();
            sourceFile = sit.next();
            destination = PDDocument.load(sourceFile);

            while (sit.hasNext()) {
                sourceFile = sit.next();
                source = PDDocument.load(sourceFile);
                tobeclosed.add(source);
                appendDocument(destination, source);
            }
            if (destinationStream == null) {
                destination.save(destinationFileName);
            } else {
                destination.save(destinationStream);
            }
        } finally {
            if (destination != null) {
                destination.close();
            }
            for (PDDocument doc : tobeclosed) {
                doc.close();
            }
        }
    }
}

From source file:com.fcore.base.fileSystem.utils.FileUtil.java

/**
 * PDF to Image(png)/*from  w ww. ja v a 2s . c  o  m*/
 * @param pdfPath
 * @param imagePath
 */
public static void pdf2png(String pdfPath, String imagePath) {
    long old = System.currentTimeMillis();
    // pdf ??
    File file = new File(pdfPath);
    try {
        PDDocument doc = PDDocument.load(file);
        PDFRenderer renderer = new PDFRenderer(doc);
        int pageCount = doc.getNumberOfPages();
        for (int i = 0; i < pageCount; i++) {
            BufferedImage image = renderer.renderImageWithDPI(i, 250); // Windows
            //BufferedImage srcImage = resize(image, 240, 240);// 
            ImageIO.write(image, "PNG", new File(imagePath + i + ".png"));
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    long now = System.currentTimeMillis();
    System.out.println("" + ((now - old) / 1000.0) + "\n\n" + "?:" + imagePath);
}

From source file:com.fileOperations.StampPDF.java

/**
 * This stamps docketed files./*from w w w  .  ja  v a2s.  c o m*/
 *
 * @param file String (full file path)
 * @param docketTime Timestamp
 * @param dept
 */
public static void stampDocument(String file, Timestamp docketTime, String dept) {
    // the document
    PDDocument doc = null;
    try {
        PDFont stampFont = PDType1Font.TIMES_ROMAN;
        float stampFontSize = 14;
        String title = PDFBoxTools.HeaderTimeStamp(docketTime) + " " + dept;
        float titleWidth = stampFont.getStringWidth(title) / 1000 * stampFontSize;
        float titleHeight = stampFont.getFontDescriptor().getFontBoundingBox().getHeight() / 1000
                * stampFontSize;
        int marginTop = 20;

        doc = PDDocument.load(new File(file));

        if (!doc.isEncrypted()) {
            for (int i = 0; i < doc.getPages().getCount(); i++) {
                PDPageContentStream contentStream = null;

                PDPage page = (PDPage) doc.getPages().get(i);

                contentStream = new PDPageContentStream(doc, page, AppendMode.APPEND, true, true);
                page.getResources().getFontNames();

                contentStream.beginText();
                contentStream.setFont(stampFont, stampFontSize);
                contentStream.setNonStrokingColor(Color.RED);
                contentStream.newLineAtOffset((page.getMediaBox().getWidth() - titleWidth) / 2,
                        page.getMediaBox().getHeight() - marginTop - titleHeight);
                contentStream.showText(title);
                contentStream.endText();

                contentStream.close();
            }
            doc.save(file);
        }
    } catch (IOException ex) {
        ExceptionHandler.Handle(ex);
    } finally {
        if (doc != null) {
            try {
                doc.close();
            } catch (IOException ex) {
                ExceptionHandler.Handle(ex);
            }
        }
    }
}

From source file:com.fngry.monk.biz.demo.pdf.pdfbox.PrintImageLocations.java

License:Apache License

/**
 * This will print the documents data.//from www .j av a 2 s  .co m
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void main(String[] args) throws IOException {
    //        if( args.length != 1 )
    //        {
    //            usage();
    //        }
    //        else
    //        {
    try (PDDocument document = PDDocument
            .load(new File("/Users/gaorongyu/Downloads/092517323115_cleaned.pdf"))) {
        PrintImageLocations printer = new PrintImageLocations();
        int pageNum = 0;
        for (PDPage page : document.getPages()) {
            pageNum++;
            if (pageNum == 7) {
                System.out.println("==============Processing page: " + pageNum);
                printer.processPage(page);
            }
        }
    }
    //        }
}

From source file:com.formkiq.core.service.conversion.ConversionServiceTest.java

License:Apache License

/**
 * sample-form2.pdf to PNG./*w ww .  j  a  v  a 2s  .c om*/
 * @throws IOException IOException
 */
@Test
public void testPdfToPngFormatConverter01() throws IOException {
    // given
    byte[] data = Resources.getResourceAsBytes("/sample-form2.pdf");
    PDDocument doc = PDDocument.load(data);

    // when
    ConversionResult result = this.cs.convert(doc, PDF, PNG);

    // then
    doc.close();

    final int datalength = 128000;
    final int dataheight = 1584;
    final int datawidth = 612;
    final int x = 211;
    final int y = 992;

    assertEquals(1, result.getFields().size());
    assertTrue(result.getData().length > datalength);
    assertEquals(dataheight, (int) result.getDataheight());
    assertEquals(datawidth, (int) result.getDatawidth());

    assertEquals(x, (int) result.getFields().get(0).getX());
    assertEquals(y, (int) result.getFields().get(0).getY());
}

From source file:com.formkiq.core.service.entry.WorkflowEntryFlowEventProcessor.java

License:Apache License

/**
 * Render PDF to PNG.//from   w  w  w  .  ja v a 2  s.com
 * @param flow {@link WebFlow}
 * @return {@link String} nothing just to prevent 302.
 * @throws IOException IOException
 */
@Transactional
public String eventIdrenderimage(final WebFlow flow) throws IOException {

    ConversionResult cpng = flow.getParameter(PDFCONV);

    if (cpng == null) {

        ArchiveDTO archive = flow.getData();
        this.folderService.createWorkflowOutput(archive);

        if (archive.getPDF().isEmpty()) {
            throw new PreconditionFailedException("No PDF Found");
        }

        if (archive.getPDF().size() > 1) {
            throw new PreconditionFailedException(archive.getPDF().size() + " PDF(s) Found");
        }

        byte[] data = archive.getPDF().values().iterator().next();
        PDDocument doc = PDDocument.load(data);

        try {
            cpng = new ConversionService().convert(doc, PDF, PNG);
            flow.setParameter(PDFCONV, cpng);
        } finally {
            doc.close();
        }
    }

    return "";
}

From source file:com.formkiq.core.service.generator.docx4j.WorkflowOutputWordDocxTest.java

License:Apache License

/**
 * create PDF output from DOCX.// w  w w.ja va  2  s .c  o m
 * @throws Exception Exception
 */
@Test
public void testCreateOutput02() throws Exception {
    // given
    WorkflowOutputDocumentType outDocType = PDF;
    ArchiveDTO archive = new ArchiveDTO();

    // when
    String docname = createOutputDoc(archive, outDocType);

    // then
    assertNull(archive.getObject(docname + ".docx"));
    assertNotNull(archive.getObject(docname + ".pdf"));

    byte[] data = archive.getObject(docname + ".pdf");

    String text = Strings.toString(data);
    assertTrue(text.startsWith("%PDF-1.4"));

    PDDocument doc = PDDocument.load(data);

    text = Strings.toString(new ConversionService().convert(doc, PDF, HTML).getData());

    assertTrue(text.contains("Jan"));
    assertTrue(text.contains("2017"));
    assertTrue(text.contains("12"));
    assertFalse(text.contains("DAY"));
    assertFalse(text.contains("MONTH"));
    assertFalse(text.contains("YEAR"));
    assertTrue(text.contains("Johnny Smith"));

    doc.close();
}

From source file:com.formkiq.core.service.generator.pdfbox.PdfEditorServiceImpl.java

License:Apache License

/**
 * Load PDDocument.//  w  w  w .j a  v a  2s.  co  m
 * @param data byte[]
 * @return {@link PDDocument}
 * @throws IOException IOException
 */
public PDDocument loadPDF(final byte[] data) throws IOException {
    return PDDocument.load(new ByteArrayInputStream(data));
}

From source file:com.formkiq.web.WorkflowAddControllerIntegrationTest.java

License:Apache License

/**
 * testCreateWorkflow11().// ww  w.ja va2  s .co  m
 * fillout and generate and sign fillable PDF
 * @throws Exception Exception
 */
@Test
public void testCreateWorkflow11() throws Exception {
    // given
    String pdfname = "sample-form2.pdf";
    byte[] data = Resources.getResourceAsBytes("/" + pdfname);
    ArchiveDTO archive = buildArchiveDTO(pdfname);
    this.pdfEditorService.generate(archive, pdfname, data);

    String token = login();
    String folder = createFolder(token, getDefaultEmail());
    addFileToFolder(token, folder, archive);

    // when
    login(getDefaultEmail());
    getDriver().navigate().to(getDefaultHostAndPort() + "/user/dashboard");
    waitForJSandJQueryToLoad();

    assertEquals("FormKiQ Server - Dashboard", getTitle());

    findElementBy(By.className("add_0")).click();

    // then (verify on correct page)
    assertEquals(getDefaultHostAndPort() + "/flow/workflow?execution=s1e1", getDriver().getCurrentUrl());
    assertEquals(SAMPLE_FORM_2_HTML_TITLE, getTitle());

    fillSampleForm2();

    // when (submit)
    submitByName("_eventId_next", "Next");

    // then verify summary
    assertEquals(getDefaultHostAndPort() + "/flow/workflow?execution=s1e2", getDriver().getCurrentUrl());
    assertEquals("FormKiQ Server - Signature", getTitle());
    assertEquals(1, findElements(getBy("button", "data-fieldid", "55")).size());
    assertEquals(0, getDriver().findElements(getBy("img", "data-fieldid", "55")).size());

    // when (go back
    submitByName("_eventId_prev", "Previous");

    // then
    assertEquals(getDefaultHostAndPort() + "/flow/workflow?execution=s1e1", getDriver().getCurrentUrl());
    assertEquals(SAMPLE_FORM_2_HTML_TITLE, getTitle());

    // when
    findElementBy(By.name("1")).sendKeys("Smith123");
    submitByName("_eventId_next", "Next");

    // then
    assertEquals(getDefaultHostAndPort() + "/flow/workflow?execution=s1e2", getDriver().getCurrentUrl());
    assertEquals("FormKiQ Server - Signature", getTitle());

    // when (signature)
    click(By.className("button-sig"));

    JavascriptExecutor jsExecutor = (JavascriptExecutor) getDriver();
    jsExecutor.executeScript("signaturemetadata('555','999');");

    // then
    getWait().until(ExpectedConditions.visibilityOfElementLocated(By.id("form-modal")));

    // when
    click(By.className("form-modal-close-button"));

    // then
    getWait().until(ExpectedConditions.invisibilityOfElementLocated(By.id("form-modal")));

    // when (signature)
    click(By.className("button-sig"));

    // then
    fillSignature("55");

    // when
    click(By.className("form-modal-update-button"));

    // then
    getWait().until(ExpectedConditions.invisibilityOfElementLocated(By.id("form-modal")));
    assertEquals(0, getDriver().findElements(getBy("button", "data-fieldid", "55")).size());
    assertEquals(1, findElements(getBy("img", "data-fieldid", "55")).size());

    // when
    submitByName("_eventId_next", " Submit", TIMEOUT * 2);

    // then complete page
    assertEquals(getDefaultHostAndPort() + "/flow/workflow?execution=s1e3", getDriver().getCurrentUrl());
    assertEquals("FormKiQ Server - sample-form2.pdf Complete", getTitle());

    Workflow workflow = archive.getWorkflow();

    Pair<Workflow, Map<String, byte[]>> pwf = verifyFolderFileList(token, folder, workflow, "ACTIVE",
            "sample-form2.pdf");
    workflow = pwf.getLeft();
    Map<String, byte[]> map = pwf.getRight();

    assertEquals(getDefaultHostAndPort() + "/api/folders/files/" + folder + "/" + workflow.getUUID() + ".pdf",
            findElementBy(By.id("pdflink")).getAttribute("href"));

    assertEquals(SAMPLE_FORM2 + ".pdf",
            map.keySet().stream().filter(s -> s.endsWith(".pdf")).collect(Collectors.joining(", ")));

    assertEquals(1, map.keySet().stream().filter(s -> s.endsWith(".pdf")).count());

    assertEquals(1, map.keySet().stream().filter(s -> s.endsWith(".signature")).count());

    FormJSON f1 = this.jsonService.readValue(map.get(workflow.getSteps().get(1) + ".form"), FormJSON.class);

    assertTrue(f1.getAssetData().containsKey(f1.getSections().get(0).getFields().get(0).getValue()));
    assertEquals("555", findValueByKey(f1, "latitude").get().getValue());
    assertEquals("999", findValueByKey(f1, "longitude").get().getValue());
    assertEquals("0:0:0:0:0:0:0:1", findValueByKey(f1, "ipaddress").get().getValue());
    assertEquals("", findValueByKey(f1, "xforwardedfor").get().getValue());

    assertNotNull(this.jsonService.stringToDate(findValueByKey(f1, "inserteddate").get().getValue()));

    byte[] pdf = map.get(SAMPLE_FORM2 + ".pdf");

    PDDocument document = PDDocument.load(pdf);
    try {
        PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm();
        assertEquals("SmithSmith123", acroForm.getField("lastName").getValueAsString());
        assertEquals("John", acroForm.getField("firstName").getValueAsString());
        assertEquals(1, document.getSignatureDictionaries().size());
    } finally {
        document.close();
    }

    // TODO verify audit
}

From source file:com.giaybac.traprange.extractor.PDFTableExtractor.java

License:MIT License

public List<Table> extract() {
    List<Table> retVal = new ArrayList<>();
    Multimap<Integer, Range<Integer>> pageIdNLineRangesMap = LinkedListMultimap.create();
    Multimap<Integer, TextPosition> pageIdNTextsMap = LinkedListMultimap.create();
    try {//from w w  w  . j  av  a2  s .  c  o  m
        this.document = PDDocument.load(inputStream);
        for (int pageId = 0; pageId < document.getNumberOfPages(); pageId++) {
            boolean b = !exceptedPages.contains(pageId)
                    && (extractedPages.isEmpty() || extractedPages.contains(pageId));
            if (b) {
                PDPage pdPage = (PDPage) document.getDocumentCatalog().getAllPages().get(pageId);
                List<TextPosition> texts = extractTextPositions(pdPage);//sorted by .getY() ASC
                //extract line ranges
                List<Range<Integer>> lineRanges = getLineRanges(pageId, texts);
                //extract column ranges
                List<TextPosition> textsByLineRanges = getTextsByLineRanges(lineRanges, texts);

                pageIdNLineRangesMap.putAll(pageId, lineRanges);
                pageIdNTextsMap.putAll(pageId, textsByLineRanges);
            }
        }
        //Calculate columnRanges
        List<Range<Integer>> columnRanges = getColumnRanges(pageIdNTextsMap.values());
        for (int pageId : pageIdNTextsMap.keySet()) {
            Table table = buildTable(pageId, (List) pageIdNTextsMap.get(pageId),
                    (List) pageIdNLineRangesMap.get(pageId), columnRanges);
            retVal.add(table);
            //debug
            logger.debug("Found " + table.getRows().size() + " row(s) and " + columnRanges.size()
                    + " column(s) of a table in page " + pageId);
        }
    } catch (IOException ex) {
        throw new RuntimeException("Parse pdf file fail", ex);
    }
    //return
    return retVal;
}