Example usage for com.itextpdf.text Image getDpiX

List of usage examples for com.itextpdf.text Image getDpiX

Introduction

In this page you can find the example usage for com.itextpdf.text Image getDpiX.

Prototype

public int getDpiX() 

Source Link

Document

Gets the dots-per-inch in the X direction.

Usage

From source file:com.ephesoft.dcma.imagemagick.MultiPageExecutor.java

License:Open Source License

/**
 * The <code>addImageToPdf</code> method is used to add image to pdf and make it searchable by adding image text in invisible mode
 * w.r.t parameter 'isPdfSearchable' passed.
 * /*from  w  ww  . j  a va2 s .c  o m*/
 * @param pdfWriter {@link PdfWriter} writer of pdf in which image has to be added
 * @param htmlUrl {@link HocrPage} corresponding html file for fetching text and coordinates
 * @param imageUrl {@link String} url of image to be added in pdf
 * @param isPdfSearchable true for searchable pdf else otherwise
 * @param widthOfLine
 */
private void addImageToPdf(PdfWriter pdfWriter, HocrPage hocrPage, String imageUrl, boolean isPdfSearchable,
        final int widthOfLine) {
    if (null != pdfWriter && null != imageUrl && imageUrl.length() > 0) {
        try {
            LOGGER.info("Adding image" + imageUrl + " to pdf using iText");
            Image pageImage = Image.getInstance(imageUrl);
            float dotsPerPointX = pageImage.getDpiX() / PDF_RESOLUTION;
            float dotsPerPointY = pageImage.getDpiY() / PDF_RESOLUTION;
            PdfContentByte pdfContentByte = pdfWriter.getDirectContent();

            pageImage.scaleToFit(pageImage.getWidth() / dotsPerPointX, pageImage.getHeight() / dotsPerPointY);

            pageImage.setAbsolutePosition(0, 0);

            // Add image to pdf
            pdfWriter.getDirectContentUnder().addImage(pageImage);
            pdfWriter.getDirectContentUnder().add(pdfContentByte);

            // If pdf is to be made searchable
            if (isPdfSearchable) {
                LOGGER.info("Adding invisible text for image: " + imageUrl);
                float pageImagePixelHeight = pageImage.getHeight();
                Font defaultFont = FontFactory.getFont(FontFactory.HELVETICA, 8, Font.BOLD, CMYKColor.BLACK);

                // Fetch text and coordinates for image to be added
                Map<String, int[]> textCoordinatesMap = getTextWithCoordinatesMap(hocrPage, widthOfLine);
                Set<String> ketSet = textCoordinatesMap.keySet();

                // Add text at specific location
                for (String key : ketSet) {
                    int[] coordinates = textCoordinatesMap.get(key);
                    float bboxWidthPt = (coordinates[2] - coordinates[0]) / dotsPerPointX;
                    float bboxHeightPt = (coordinates[3] - coordinates[1]) / dotsPerPointY;
                    pdfContentByte.beginText();

                    // To make text added as invisible
                    pdfContentByte.setTextRenderingMode(PdfContentByte.TEXT_RENDER_MODE_INVISIBLE);
                    pdfContentByte.setLineWidth(Math.round(bboxWidthPt));

                    // Ceil is used so that minimum font of any text is 1
                    // For exception of unbalanced beginText() and endText()
                    if (bboxHeightPt > 0.0) {
                        pdfContentByte.setFontAndSize(defaultFont.getBaseFont(),
                                (float) Math.ceil(bboxHeightPt));
                    } else {
                        pdfContentByte.setFontAndSize(defaultFont.getBaseFont(), 1);
                    }
                    float xCoordinate = (float) (coordinates[0] / dotsPerPointX);
                    float yCoordinate = (float) ((pageImagePixelHeight - coordinates[3]) / dotsPerPointY);
                    pdfContentByte.moveText(xCoordinate, yCoordinate);
                    pdfContentByte.showText(key);
                    pdfContentByte.endText();
                }
            }
            pdfContentByte.closePath();
        } catch (BadElementException badElementException) {
            LOGGER.error("Error occurred while adding image" + imageUrl + " to pdf using Itext: "
                    + badElementException.toString());
        } catch (DocumentException documentException) {
            LOGGER.error("Error occurred while adding image" + imageUrl + " to pdf using Itext: "
                    + documentException.toString());
        } catch (MalformedURLException malformedURLException) {
            LOGGER.error("Error occurred while adding image" + imageUrl + " to pdf using Itext: "
                    + malformedURLException.toString());
        } catch (IOException ioException) {
            LOGGER.error("Error occurred while adding image" + imageUrl + " to pdf using Itext: "
                    + ioException.toString());
        }
    }
}

From source file:pdfextract.ExtractInfo.java

public void extractImagesInfo() {
    try {//from   w ww . j  a  v  a 2 s. c  o  m
        PdfReader chartReader = new PdfReader("vv.pdf");
        for (int i = 0; i < chartReader.getXrefSize(); i++) {
            PdfObject pdfobj = chartReader.getPdfObject(i);
            if (pdfobj != null && pdfobj.isStream()) {
                PdfStream stream = (PdfStream) pdfobj;
                PdfObject pdfsubtype = stream.get(PdfName.SUBTYPE);
                //System.out.println("Stream subType: " + pdfsubtype); 
                if (pdfsubtype != null && pdfsubtype.toString().equals(PdfName.IMAGE.toString())) {
                    byte[] image = PdfReader.getStreamBytesRaw((PRStream) stream);
                    Image imageObject = Image.getInstance(image);
                    System.out.println("Resolution" + imageObject.getDpiX());
                    System.out.println("Height" + imageObject.getHeight());
                    System.out.println("Width" + imageObject.getWidth());

                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:pl.marcinmilkowski.hocrtopdf.Main.java

License:Open Source License

/**
 * @param args//from w  w  w .  j a va 2  s .c om
 */
public static void main(String[] args) {
    try {
        if (args.length < 1 || args[0] == "--help" || args[0] == "-h") {
            System.out.print("Usage: java pl.marcinmilkowski.hocrtopdf.Main INPUTURL.html OUTPUTURL.pdf\n"
                    + "\n" + "Converts hOCR files into PDF\n" + "\n"
                    + "Example: java pl.marcinmilkowski.hocrtopdf.Main hocr.html output.pdf\n");
            if (args.length < 1)
                System.exit(-1);
            else
                System.exit(0);
        }
        URL inputHOCRFile = null;
        FileOutputStream outputPDFStream = null;
        try {
            File file = new File(args[0]);
            inputHOCRFile = file.toURI().toURL();
        } catch (MalformedURLException e) {
            System.out.println("The first parameter has to be a valid file.");
            System.out.println("We got an error: " + e.getMessage());
            System.exit(-1);
        }
        try {
            outputPDFStream = new FileOutputStream(args[1]);
        } catch (FileNotFoundException e) {
            System.out.println("The second parameter has to be a valid URL");
            System.exit(-1);
        }

        // The resolution of a PDF file (using iText) is 72pt per inch
        float pointsPerInch = 72.0f;

        // Using the jericho library to parse the HTML file
        Source source = new Source(inputHOCRFile);

        int pageCounter = 1;

        Document pdfDocument = null;
        PdfWriter pdfWriter = null;
        PdfContentByte cb = null;
        RandomAccessFileOrArray ra = null;

        // Find the tag of class ocr_page in order to load the scanned image
        StartTag pageTag = source.getNextStartTag(0, "class", OCRPAGE);
        while (pageTag != null) {
            int prevPos = pageTag.getEnd();
            Pattern imagePattern = Pattern.compile("image\\s+([^;]+)");
            Matcher imageMatcher = imagePattern.matcher(pageTag.getElement().getAttributeValue("title"));
            if (!imageMatcher.find()) {
                System.out.println("Could not find a tag of class \"ocr_page\", aborting.");
                System.exit(-1);
            }
            // Load the image
            Image pageImage = null;
            try {
                File file = new File(imageMatcher.group(1));
                pageImage = Image.getInstance(file.toURI().toURL());
            } catch (MalformedURLException e) {
                System.out.println("Could not load the scanned image from: " + "file://" + imageMatcher.group(1)
                        + ", aborting.");
                System.exit(-1);
            }
            if (pageImage.getOriginalType() == Image.ORIGINAL_TIFF) { // this might
                                                                      // be
                                                                      // multipage
                                                                      // tiff!
                File file = new File(imageMatcher.group(1));
                if (pageCounter == 1 || ra == null) {
                    ra = new RandomAccessFileOrArray(file.toURI().toURL());
                }
                int nPages = TiffImage.getNumberOfPages(ra);
                if (nPages > 0 && pageCounter <= nPages) {
                    pageImage = TiffImage.getTiffImage(ra, pageCounter);
                }
            }
            int dpiX = pageImage.getDpiX();
            if (dpiX == 0) { // for images that don't set the resolution we assume
                             // 300 dpi
                dpiX = 300;
            }
            int dpiY = pageImage.getDpiY();
            if (dpiY == 0) { // as above for dpiX
                dpiY = 300;
            }
            float dotsPerPointX = dpiX / pointsPerInch;
            float dotsPerPointY = dpiY / pointsPerInch;
            float pageImagePixelHeight = pageImage.getHeight();
            if (pdfDocument == null) {
                pdfDocument = new Document(new Rectangle(pageImage.getWidth() / dotsPerPointX,
                        pageImage.getHeight() / dotsPerPointY));
                pdfWriter = PdfWriter.getInstance(pdfDocument, outputPDFStream);
                pdfDocument.open();
                // Put the text behind the picture (reverse for debugging)
                // cb = pdfWriter.getDirectContentUnder();
                cb = pdfWriter.getDirectContent();
            } else {
                pdfDocument.setPageSize(new Rectangle(pageImage.getWidth() / dotsPerPointX,
                        pageImage.getHeight() / dotsPerPointY));
                pdfDocument.newPage();
            }
            // first define a standard font for our text
            BaseFont base = BaseFont.createFont(BaseFont.HELVETICA, BaseFont.CP1250, BaseFont.EMBEDDED);
            Font defaultFont = new Font(base, 8);
            // FontFactory.getFont(FontFactory.HELVETICA, 8, Font.BOLD,
            // CMYKColor.BLACK);

            cb.setHorizontalScaling(1.0f);

            pageImage.scaleToFit(pageImage.getWidth() / dotsPerPointX, pageImage.getHeight() / dotsPerPointY);
            pageImage.setAbsolutePosition(0, 0);
            // Put the image in front of the text (reverse for debugging)
            // pdfWriter.getDirectContent().addImage(pageImage);
            pdfWriter.getDirectContentUnder().addImage(pageImage);

            // In order to place text behind the recognised text snippets we are
            // interested in the bbox property
            Pattern bboxPattern = Pattern.compile("bbox(\\s+\\d+){4}");
            // This pattern separates the coordinates of the bbox property
            Pattern bboxCoordinatePattern = Pattern.compile("(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)");
            // Only tags of the ocr_line class are interesting
            StartTag ocrTag = source.getNextStartTag(prevPos, "class", OCRPAGEORLINE);
            while (ocrTag != null) {
                prevPos = ocrTag.getEnd();
                if ("ocrx_word".equalsIgnoreCase(ocrTag.getAttributeValue("class"))) {
                    net.htmlparser.jericho.Element lineElement = ocrTag.getElement();
                    Matcher bboxMatcher = bboxPattern.matcher(lineElement.getAttributeValue("title"));
                    if (bboxMatcher.find()) {
                        // We found a tag of the ocr_line class containing a bbox property
                        Matcher bboxCoordinateMatcher = bboxCoordinatePattern.matcher(bboxMatcher.group());
                        bboxCoordinateMatcher.find();
                        int[] coordinates = { Integer.parseInt((bboxCoordinateMatcher.group(1))),
                                Integer.parseInt((bboxCoordinateMatcher.group(2))),
                                Integer.parseInt((bboxCoordinateMatcher.group(3))),
                                Integer.parseInt((bboxCoordinateMatcher.group(4))) };
                        String line = lineElement.getContent().getTextExtractor().toString();
                        float bboxWidthPt = (coordinates[2] - coordinates[0]) / dotsPerPointX;
                        float bboxHeightPt = (coordinates[3] - coordinates[1]) / dotsPerPointY;

                        // Put the text into the PDF
                        cb.beginText();
                        // Comment the next line to debug the PDF output (visible Text)
                        cb.setTextRenderingMode(PdfContentByte.TEXT_RENDER_MODE_INVISIBLE);
                        // height
                        cb.setFontAndSize(defaultFont.getBaseFont(), Math.max(Math.round(bboxHeightPt), 1));
                        // width
                        cb.setHorizontalScaling(bboxWidthPt / cb.getEffectiveStringWidth(line, false));
                        cb.moveText((coordinates[0] / dotsPerPointX),
                                ((pageImagePixelHeight - coordinates[3]) / dotsPerPointY));
                        cb.showText(line);
                        cb.endText();
                        cb.setHorizontalScaling(1.0f);
                    }
                } else {
                    if ("ocr_page".equalsIgnoreCase(ocrTag.getAttributeValue("class"))) {
                        pageCounter++;
                        pageTag = ocrTag;
                        break;
                    }
                }
                ocrTag = source.getNextStartTag(prevPos, "class", OCRPAGEORLINE);
            }
            if (ocrTag == null) {
                pdfDocument.close();
                break;
            }
        }
    } catch (DocumentException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}