Example usage for org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination PDPageXYZDestination getPageNumber

List of usage examples for org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination PDPageXYZDestination getPageNumber

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination PDPageXYZDestination getPageNumber.

Prototype

public int getPageNumber() 

Source Link

Document

This will get the page number for this destination.

Usage

From source file:com.zilbo.flamingSailor.TE.PDFParser.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    super.endPage(page);
    int pieceID = 0;
    Map<String, Map<Integer, Long>> fontCounts = new HashMap<>();
    List<TextPiece> wordsOfThisPage = new ArrayList<>();

    for (List<TextPosition> aCharactersByArticle : charactersByArticle) {
        //   int len = aCharactersByArticle.size();
        for (TextPosition t : aCharactersByArticle) {
            // copy information
            TextPiece w = new TextPiece(pieceID++);
            PDFont font = t.getFont();/*from   ww w.j  ava2 s .co  m*/
            PDFontDescriptor fontDescriptor = font.getFontDescriptor();

            //   w.setFontDescriptor(fontDescriptor);
            if (fontDescriptor == null) {
                w.setFontName("UNKNOWN");
            } else {
                w.setFontName(fontDescriptor.getFontName());
            }

            /*
            * 100: a simple step to fix the font size to the normal range, for those documents in unknown codes that PDFBox can not process now
            */
            if (t.getFontSize() < 0.3 && t.getYScale() <= 1.0) {
                w.setFontSize(t.getFontSize() * 100);
                w.setHeight(Math.max(t.getYScale(), t.getFontSize()) * 100);
                w.setXScale(t.getXScale());
                w.setYScale(t.getYScale());
            } else {
                if (t.getYScale() < 0.3 && t.getFontSize() <= 1.0) {
                    w.setYScale(t.getYScale() * 100);
                    w.setXScale(t.getXScale() * 100);
                    w.setHeight(Math.max(t.getYScale() * 100, t.getFontSize()));
                } else {
                    w.setFontSize(t.getFontSize());
                    w.setHeight(Math.max(t.getYScale(), t.getFontSize()));
                    w.setXScale(t.getXScale());
                    w.setYScale(t.getYScale());
                }
            }

            Map<Integer, Long> counts = fontCounts.get(w.getFontName());
            if (counts == null) {
                counts = new HashMap<>();
                fontCounts.put(w.getFontName(), counts);
            }
            Long count = counts.get((int) Math.round(w.getHeight()));
            if (count == null) {
                count = 1L;
            } else {
                count += 1L;
            }
            counts.put((int) Math.round(w.getHeight()), count);

            w.setWidth(Math.abs(t.getWidth()));
            w.setGeom(t.getX(), t.getY(), w.getWidth(), w.getHeight());

            w.setText(t.getCharacter());

            w.setWidthOfSpace(t.getWidthOfSpace());
            wordsOfThisPage.add(w);
        }
    }
    currentPage.processPage(wordsOfThisPage, fontCounts);
    currentPage.setText(outString.getBuffer().toString());
    outString.getBuffer().setLength(0);
    List<PDAnnotation> annotations = page.getAnnotations();

    for (PDAnnotation annotation : annotations) {
        if (annotation instanceof PDAnnotationLink) {
            PDAnnotationLink l = (PDAnnotationLink) annotation;
            PDRectangle rect = l.getRectangle();
            PDDestination dest = l.getDestination();
            if (dest instanceof PDPageXYZDestination) {
                PDPageXYZDestination xyzDestination = (PDPageXYZDestination) dest;
                PDPage pageDest = ((PDPageXYZDestination) dest).getPage();

                if (rect != null) {
                    if (xyzDestination.getPageNumber() < 0) {
                        int pageNumber = allpages.indexOf(pageDest) + 1;
                        Rectangle2D hotbox = new Rectangle2D.Double(rect.getLowerLeftX(), rect.getLowerLeftY(),
                                (rect.getUpperRightX() - rect.getLowerLeftX()),
                                (rect.getUpperRightY() - rect.getLowerLeftY()));
                        Point2D toPoint = new Point2D.Double(xyzDestination.getLeft(), xyzDestination.getTop());
                        currentPage.addLink(new PDLink(hotbox, pageNumber, toPoint));
                    }
                }
            }
        }
    }

    /*
     The following code is REALLY raw.
     initial testing seemed to show memory leaks, and was REALLY slow.
            
    PDResources r = page.getResources();
    Map<String, PDXObjectImage> images = r.getImages();
    for (Map.Entry<String, PDXObjectImage> e : images.entrySet()) {
    BufferedImage bi = null;
    try {
            
        //   currentPage.addImage(bi);
            
        //    (e.getValue()).write2file("/tmp/II" + e.getKey());
        if (e.getValue() instanceof PDJpeg) {
            PDJpeg jpg = (PDJpeg) e.getValue();
            bi = jpg.getRGBImage();
            ColorSpace cs = bi.getColorModel().getColorSpace();
            File jpgFile = new File("/tmp/II" + e.getKey() + ".jpg");
            
            if (cs instanceof ColorSpaceCMYK) {
            
                logger.info("Ignoring image with CMYK color space");
            } else {
               // ImageIO.write(bi, "jpg", jpgFile);
                jpg.write2file("/tmp/II"+ e.getKey());
            }
            
        } else {
            (e.getValue()).write2file("/tmp/II" + e.getKey());
        }
    } catch (Exception ee) {
        logger.info("can't read image ;-(", ee);
    }
            
    }
    */

    textPageList.add(currentPage);
    currentPage = null;
}