Example usage for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException

Source Link

Document

Parses a PDF.

Usage

From source file:com.truckzoo.test.pdf.Printing.java

License:Apache License

/**
 * Entry point./*from w w w  . j a v  a2s.c o  m*/
 */
public static void main(String args[]) throws PrinterException, IOException {
    /* if (args.length != 1)
     {
    System.err.println("usage: java " + Printing.class.getName() + " <input>");
    System.exit(1);
     }*/

    String filename = "dekho.pdf";
    PDDocument document = PDDocument.load(new File(filename));

    // choose your printing method:
    print(document);
    //printWithAttributes(document);
    //printWithDialog(document);
    //printWithDialogAndAttributes(document);
    //printWithPaper(document);
}

From source file:com.truckzoo.test.pdf.SuperimposePage.java

License:Apache License

public static void main(String[] args) throws IOException {
    /*if (args.length != 2)
    {/*from   w  ww  . j a v  a  2s .  c  o  m*/
    System.err.println("usage: " + SuperimposePage.class.getName() +
            " <source-pdf> <dest-pdf>");
    System.exit(1);
    }*/
    String sourcePath = args[0];
    String destPath = args[1];

    PDDocument sourceDoc = null;
    try {
        // load the source PDF
        sourceDoc = PDDocument.load(new File(sourcePath));
        int sourcePage = 1;

        // create a new PDF and add a blank page
        PDDocument doc = new PDDocument();
        PDPage page = new PDPage();
        doc.addPage(page);

        // write some sample text to the new page
        PDPageContentStream contents = new PDPageContentStream(doc, page);
        contents.beginText();
        contents.setFont(PDType1Font.HELVETICA_BOLD, 12);
        contents.newLineAtOffset(2, PDRectangle.LETTER.getHeight() - 12);
        contents.showText("Sample text");
        contents.endText();

        // Create a Form XObject from the source document using LayerUtility
        LayerUtility layerUtility = new LayerUtility(doc);
        PDFormXObject form = layerUtility.importPageAsForm(sourceDoc, sourcePage - 1);

        // draw the full form
        contents.drawForm(form);

        // draw a scaled form
        contents.saveGraphicsState();
        Matrix matrix = Matrix.getScaleInstance(0.5f, 0.5f);
        contents.transform(matrix);
        contents.drawForm(form);
        contents.restoreGraphicsState();

        // draw a scaled and rotated form
        contents.saveGraphicsState();
        matrix.rotate(1.8 * Math.PI); // radians
        contents.transform(matrix);
        contents.drawForm(form);
        contents.restoreGraphicsState();

        contents.close();
        doc.save(destPath);
        doc.close();
    } finally {
        if (sourceDoc != null) {
            sourceDoc.close();
        }
    }
}

From source file:com.verbox.PrintHtml.java

/**
 * @param inpHtml//from w w w  . j  a v  a 2 s  .  c  om
 * @throws java.io.FileNotFoundException
 * @throws javax.xml.parsers.ParserConfigurationException
 * @throws org.xml.sax.SAXException
 * @throws java.awt.print.PrinterException
 * @throws com.itextpdf.text.DocumentException
 */
public static boolean Print(boolean a) throws PrinterException {
    if (a) {
        boolean flag = true;
        (new Thread(() -> {
            try {
                try (PDDocument doc = PDDocument.load("pdf.pdf")) {
                    PrinterJob job = PrinterJob.getPrinterJob();
                    PrintService printer = PrintServiceLookup.lookupDefaultPrintService();
                    job.setPrintService(printer);
                    doc.silentPrint(job);
                }

            } catch (PrinterException ex) {
                JOptionPane.showMessageDialog(null,
                        "? ? ? ,  ?   .");
            } catch (IOException ex) {
                Logger.getLogger(PrintHtml.class.getName()).log(Level.SEVERE, null, ex);
            }
        })).start();
    }
    return true;
}

From source file:com.verbox.PrintHtml.java

public static void PreImgPrint() throws IOException {

    int resolution = Toolkit.getDefaultToolkit().getScreenResolution();

    String pdfPath = "name_img";

    //load pdf document
    PDDocument document = PDDocument.load("pdf.pdf");

    List<PDPage> pages = document.getDocumentCatalog().getAllPages();

    //Read first page
    PDPage page = pages.get(0);//from www  .ja  va 2s . c om

    //Convert To Image          
    BufferedImage previewImage = page.convertToImage(BufferedImage.TYPE_INT_RGB, resolution);

    //Save to file
    ImageIO.write(previewImage, "png", new File(pdfPath + ".png"));

}

From source file:com.virtusa.isq.vtaf.runtime.SeleniumTestBase.java

License:Apache License

/**
 * Pdf verify.//from   ww w. j  av  a 2 s. c om
 * 
 * @param filePath
 *            the file path
 * @param pageNumberRange
 *            the page number range
 * @param verifyType
 *            the verify type
 * @param inputString
 *            the input string
 * @param stopOnFaliure
 *            the stop on faliure
 */
private void pdfVerify(final String filePath, final String pageNumberRange, final String verifyType,
        final String inputString, final boolean stopOnFaliure, final Object[] customError) {

    String pageText;

    if ("RELATIVE".equalsIgnoreCase(verifyType)) {
        String[] inputStringSeperate = inputString.split("\\,");
        String key = inputStringSeperate[0];
        String value = inputStringSeperate[1];
        String[] pageSeperate = pageNumberRange.split("\\-");
        int startPage = Integer.parseInt(pageSeperate[0]);
        int endPage = Integer.parseInt(pageSeperate[1]);
        try {
            File file = new File(filePath);
            PDDocument pdfDoc = PDDocument.load(file);

            PDFTextStripper reader = new PDFTextStripper();
            reader.setStartPage(startPage);
            reader.setEndPage(endPage);
            reader.setSuppressDuplicateOverlappingText(false);
            pageText = reader.getText(pdfDoc);

            BufferedReader br = new BufferedReader(new StringReader(pageText));

            String tmp = "";
            boolean isPresent = false;
            StringBuilder entireString = new StringBuilder();
            while ((tmp = br.readLine()) != null) {

                if (tmp.equals(key)) {
                    String val = br.readLine();
                    if (val != null && val.equals(value)) {
                        isPresent = true;
                        break;
                    } else {
                        continue;
                    }
                }
                entireString.append(tmp);
            }
            if (entireString.toString().contains(key)) {

                int index = entireString.indexOf(key);
                String subEntire = entireString.toString().substring(index);
                String newString = subEntire.replace(key, "");
                if (newString.trim().startsWith(value)) {
                    isPresent = true;
                }

            }
            if (isPresent) {
                reportresult(true, "CHECK DOCUMENT :", "PASSED",
                        "CheckDocument command: Verified text as expected. Expected Value : " + inputString);
            } else {

                if (customError != null && !(customError[0].equals("null") || customError[0].equals(""))) {

                    reportresult(true, "CHECK DOCUMENT :", "FAILED", " Custom Error :"
                            + generateCustomError(customError)
                            + " System generated Error : CheckDocument command: Verified text as not expected. Expected Value : "
                            + inputString);
                    checkTrue(false, stopOnFaliure, " Custom Error :" + generateCustomError(customError)
                            + " System generated Error : CheckDocument command: Verified text as not expected. Expected Value : "
                            + inputString);

                } else {

                    reportresult(true, "CHECK DOCUMENT :", "FAILED",
                            "CheckDocument command: Verified text as not expected. Expected Value : "
                                    + inputString);
                    checkTrue(false, stopOnFaliure,
                            "CheckDocument command: Verified text as not expected : " + inputString);
                }

            }
        } catch (Exception e) {

            if (customError != null && !(customError[0].equals("null") || customError[0].equals(""))) {

                reportresult(true, "CHECK DOCUMENT :", "FAILED",
                        " Custom Error :" + generateCustomError(customError)
                                + " System generated Error : CheckDocument command: Text Verification failed: "
                                + inputString);
                checkTrue(false, stopOnFaliure,
                        " Custom Error :" + generateCustomError(customError)
                                + " System generated Error : CheckDocument command: Text Verification failed: "
                                + inputString + e.getMessage());

            } else {

                reportresult(true, "CHECK DOCUMENT :", "FAILED",
                        "CheckDocument command: Verified text as not expected. Expected Value : "
                                + inputString);
                checkTrue(false, stopOnFaliure,
                        "CheckDocument command: Verified text as not expected : " + inputString);
            }
        }
    }

}

From source file:com.vns.pdf.impl.PdfDocument.java

License:Apache License

private PdfDocument(String pdfFileName) throws IOException {
    this.pdfFileName = pdfFileName;
    setWorkingDir();/* ww w  . j  a  v a  2 s  .  c o m*/
    Path filePath = Paths.get(pdfFileName);
    PosixFileAttributes attrs = Files.getFileAttributeView(filePath, PosixFileAttributeView.class)
            .readAttributes();
    String textAreaFileName = filePath.getFileName().toString() + "_" + filePath.toAbsolutePath().hashCode()
            + "_" + attrs.size() + "_" + attrs.lastModifiedTime().toString().replace(":", "_") + ".xml";
    textAreaFilePath = Paths.get(workingDir.toAbsolutePath().toString(), textAreaFileName);
    pdfTextStripper = new CustomPDFTextStripper();
    document = PDDocument.load(new File(pdfFileName));
    pdfRenderer = new PDFRenderer(document);

    if (Files.notExists(textAreaFilePath, LinkOption.NOFOLLOW_LINKS)) {
        pdfTextStripper.setSortByPosition(false);
        pdfTextStripper.setStartPage(0);
        pdfTextStripper.setEndPage(document.getNumberOfPages());

        this.doc = new Doc(new ArrayList<>(), new ArrayList<>());
        for (int i = 0; i < document.getNumberOfPages(); i++) {
            PDPage pdPage = document.getPage(i);
            PDRectangle box = pdPage.getMediaBox();
            this.doc.getPages().add(new Page(new ArrayList<>(), new ArrayList<>(), (int) box.getWidth(),
                    (int) box.getHeight()));
        }

        Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
        try {
            pdfTextStripper.writeText(document, dummy);
        } catch (Exception ex) {
            LOGGER.error(ex.getMessage(), ex);
        }
        parseBookmarksAnnotation();
        createTextAreaFile();
        //document.save(pdfFileName + ".pdf");
    } else {
        loadTextAreaFile();
    }
}

From source file:com.webfront.app.utils.PDFImporter.java

@Override
public void doImport(BufferedReader reader) throws IOException, ParseException {
    PDFTextStripper pdfStripper = new PDFTextStripper();
    BufferedInputStream inStream = new BufferedInputStream(new FileInputStream(fileName));
    try (PDDocument document = PDDocument.load(inStream)) {
        txtOutput = pdfStripper.getText(document);
        try (FileWriter writer = new FileWriter(cfg.getTmpDir() + cfg.getFileSep() + "pdfOut.txt")) {
            writer.write(txtOutput);//from ww w.  ja  v  a 2s. co m
            writer.close();
            txtReader = new BufferedReader(new FileReader(cfg.getTmpDir() + cfg.getFileSep() + "pdfOut.txt"));
            String text = "";
            while (text != null) {
                text = txtReader.readLine();
                buffer.put(currentLine++, text);
            }
        }
        getConfig();
        currentLine = 0;
        Element root = xmlDoc.getRootElement();
        int maxLines = buffer.size() - 3;
        int markedLine = 0;
        // Scan the output and mark the start of each section
        for (Element el : root.getChildren()) {
            for (Element section : el.getChildren()) {
                String sectionName = section.getAttributeValue("content");
                Element startElement = section.getChild("start");
                Element endElement = section.getChild("end");
                if (startElement != null) {
                    boolean endHasBounds = true;
                    if (endElement.getAttribute("bounded") != null) {
                        String bounds = endElement.getAttributeValue("bounded");
                        if (bounds.equals("false")) {
                            endHasBounds = false;
                        }
                    }
                    Pattern linePattern = Pattern.compile(startElement.getText());
                    String text = "";
                    boolean elementFound = false;
                    while (currentLine < maxLines) {
                        text = buffer.get(currentLine++);
                        if (linePattern.matcher(text).matches()) {
                            markedLine = currentLine - 1;
                            markers.put(sectionName, markedLine);
                            elementFound = true;
                            if (!endHasBounds) {
                                currentLine--;
                            }
                            break;
                        }
                    }
                    if (!elementFound) {
                        currentLine = markedLine;
                    }
                }
            }
        }

        ArrayList<Integer> lineNumbers = new ArrayList<>(markers.values());
        lineNumbers.sort(new Comparator<Integer>() {
            @Override
            public int compare(Integer o1, Integer o2) {
                return o1.compareTo(o2);
            }
        });
        sectionMarks = new TreeSet(markers.values());
        currentLine = 0;
        for (Element element : root.getChildren()) {
            int lines = 0;
            if (element.getAttribute("lines") != null) {
                lines = element.getAttribute("lines").getIntValue();
            }
            for (Element section : element.getChildren()) {
                String contentDesc;
                contentDesc = (section.getAttribute("content") == null ? ""
                        : section.getAttributeValue("content"));
                if (markers.containsKey(contentDesc)) {
                    currentLine = markers.get(contentDesc);
                    processSection(section);
                }
            }
        }
    } catch (DataConversionException ex) {
        Logger.getLogger(PDFImporter.class.getName()).log(Level.SEVERE, null, ex);
    } catch (ElementNotFoundException ex) {
        Logger.getLogger(PDFImporter.class.getName()).log(Level.SEVERE, null, ex);
    }
    entries.sort(LedgerEntry.LedgerComparator);
    for (LedgerEntry item : entries) {
        java.util.Date date = new java.util.Date(DateConvertor.toLong(item.getDate(), "MM/dd/yyyy"));
        String amountString = item.getAmount();
        boolean isCredit = true;
        if (item.getAmount().startsWith("-")) {
            isCredit = false;
            String amt = item.getAmount().replaceFirst("-", "");
            item.setAmount(amt);
        }
        float amount = Float.parseFloat(amountString);
        if (isCredit) {
            lastBalance += amount;
            totalDeposits += amount;
        } else {
            lastBalance -= amount;
            totalWithdrawals += amount;
        }
        Ledger ledger = new Ledger(null, date, amount, lastBalance, accountId);
        if (item.getDescription().length() > 120) {
            item.setDescription(item.getDescription().substring(0, 119));
        }
        if (item.getCheckNumber() != null && !item.getCheckNumber().isEmpty()) {
            ledger.setCheckNum(item.getCheckNumber());
            totalChecks -= amount;
        }
        ledger.setTransDesc(item.getDescription());
        getItemList().add(ledger);
    }
}

From source file:com.wintindustries.pdffilter.pdfcore.PDFTester.java

static public void printOutPDFTExt(String path) {

    PDDocument document;/*w w  w  .  java2s .  c  o  m*/

    try {
        document = PDDocument.load("C:\\Users\\Administrator\\Pictures\\2015_06_28_19_17_14.pdf");
        PDFTextStripper stripper = new PDFTextStripper();

        stripper.setStartPage(1);
        stripper.setEndPage(2);
        String parsedText = stripper.getText(document);
        printMetadata(document);

        System.out.println("PDF TEXT: \n\n\n\n\n" + parsedText);

    } catch (Exception ex) {
        Logger.getLogger(PDFTester.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:com.yiyihealth.tools.test.DrawPrintTextLocations.java

License:Apache License

/**
 * This will print the documents data./*from ww  w .  ja v a  2  s.  com*/
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 */
public static void main(String[] args) throws IOException {
    if (args.length != 1) {
        usage();
    } else {
        PDDocument document = null;
        try {
            document = PDDocument.load(new File(args[0]));

            DrawPrintTextLocations stripper = new DrawPrintTextLocations(document, args[0]);
            stripper.setSortByPosition(true);

            for (int page = 0; page < document.getNumberOfPages(); ++page) {
                stripper.stripPage(page);
            }
        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}

From source file:com.zilbo.flamingSailor.TE.PDFParser.java

License:Apache License

/**
 * get a TextPage out of the PDF, ignoring characters smaller than minHeight.
 *
 * @param pdfFile   the File to extract it out of
 * @param minHeight minimum height to ignore
 * @return a Page//w w  w .  j  a va  2s  . c om
 */

public List<TextPage> getTextPages(File pdfFile, float minHeight) {
    fileName = pdfFile.getName();
    outString = new StringWriter();
    this.minHeight = minHeight;
    this.textPageList = new ArrayList<>();
    Map<String, Map<Integer, Long>> fontCounts = new HashMap<>();
    document = null;
    try {
        document = PDDocument.load(pdfFile);
        catalog = document.getDocumentCatalog();
        allpages = catalog.getAllPages();

        this.writeText(document, outString);
        outString.close();
        outString = null;
        // document.close();
    } catch (IOException e) {
        logger.error("I/O Error:" + pdfFile.getName(), e);
    } finally {
        if (document != null) {
            try {
                document.close();
                document = null;
            } catch (IOException e) {
                logger.error("I/O error closing file:" + pdfFile.getName(), e);
            }
        }
    }

    // the page is currently a set of lines with text pieces.
    // next steps
    // 1. remove header/footer boilerplate
    // 2. get font stats
    // 3. construct higher order components
    //
    TextPage.removeBoilerplate(textPageList, TextPage.LEVENSHTEIN_DISTANCE);
    long histogram[] = null;
    for (TextPage page : textPageList) {
        double avgLeft = page.getAvgLeft();
        double avgRight = page.getAvgRight();
        double avgWidth = page.getAvgWidth();
        long lineCount = page.getLineCount();
        Double charDensity = page.getCharDensity();
        histogram = Component.mergeHistogram(page.getHistogram(), histogram);
        if (lineCount > 0) {
            docAvgLeft += avgLeft * lineCount;
            docAvgWidth += avgWidth * lineCount;
            docAvgRight += avgRight * lineCount;
            docCharDensity += charDensity * lineCount;
            docLineCount += lineCount;
        }
        Map<String, Map<Integer, Long>> pageFontCounts = page.getFontCounts();
        for (Map.Entry<String, Map<Integer, Long>> e : pageFontCounts.entrySet()) {
            Map<Integer, Long> fontTally = fontCounts.get(e.getKey());
            if (fontTally == null) {
                fontTally = new HashMap<>();

            }
            for (Map.Entry<Integer, Long> pageFontTally : e.getValue().entrySet()) {
                Long tally = fontTally.get(pageFontTally.getKey());
                if (tally == null) {
                    fontTally.put(pageFontTally.getKey(), pageFontTally.getValue());
                } else {
                    fontTally.put(pageFontTally.getKey(), tally + pageFontTally.getValue());
                }
            }
            fontCounts.put(e.getKey(), fontTally);
        }
    }

    docAvgLeft /= docLineCount;
    docAvgRight /= docLineCount;
    docAvgWidth /= docLineCount;
    docCharDensity /= docLineCount;
    linesPerPage = docLineCount / textPageList.size();
    normalizeFontCounts(fontCounts);
    normalizedHistogram = Component.getNormalizedHistogram(histogram);
    logger.info(Component.normHistoGramToString(normalizedHistogram) + String.format(
            " H:%5.1f W:%6.1f D:%4.2f P:%4.2f", (double) highestFreqSize, docAvgWidth, docCharDensity, 1.0));
    for (TextPage page : textPageList) {
        page.constructPageComponents(highestFreqSize, this.minFontSize, this.maxFontSize, normalizedFontCounts,
                normalizedFonts, normalizedSizes, docAvgLeft, docAvgRight, docAvgWidth, docCharDensity,
                linesPerPage, normalizedHistogram);
    }

    return textPageList;
}