List of usage examples for org.apache.pdfbox.pdmodel.common PDRectangle getLowerLeftY
public float getLowerLeftY()
From source file:org.elacin.pdfextract.datasource.pdfbox.PDFBoxIntegration.java
License:Apache License
private void correctPosition(@NotNull final PDFont fontObj, final byte[] string, final int i, @NotNull final String c, final float fontSizeText, final float glyphSpaceToTextSpaceFactor, float horizontalScalingText, final int codeLength, @NotNull final ETextPosition text) throws IOException { /**/*from w w w . jav a 2 s . co m*/ * Provide precise positioning of glyphs. * * There are several problems right which needs to be worked around: * * 1. Sometimes the PDF will make room for a glyph which belongs to a font with * one or more very tall glyphs by jumping up on the page before drawing. * Since most glyphs are (much) shorter than the tallest one, we need to make * up for that by adjusting the Y coordinate back down. The distance which * is jumped up is embedded in the PDF files, so there is no other way to go * about this. * * 'beforeRoomForGlyph' is the position we were at before the jump back. * Then we need to add spaceOverChar which is my estimate of where the glyph * should begin. the result is kept in 'startY' * * 2. The default height we get might also be too big, so recalculate that based * on character bounding * */ final BoundingBox character = fontObj.getCharacterBoundingBox(string, i, codeLength); PDRectangle fontBB = null; try { fontBB = fontObj.getFontBoundingBox(); } catch (RuntimeException e) { // ignore, this is frequently not implemented } final Rectangle pos = text.getPos(); float adjust = (fontSizeText * horizontalScalingText) / glyphSpaceToTextSpaceFactor; adjust *= getTextMatrix().getXScale(); final Rectangle newPos; if ((character != null) && (fontBB != null) && (character.getHeight() > 0.0f) && (fontBB.getHeight() > 0.0f)) { /* remove the upper and lower bounds filtered away by character */ final float spaceUnderChar = Math.min(fontBB.getLowerLeftY(), character.getLowerLeftY()); final float spaceOverChar = fontBB.getUpperRightY() - character.getUpperRightY(); final float fontHeight = fontBB.getHeight(); /* calculate the upper left corner of the rendered glyph */ float yStart = pos.endY - adjust * fontHeight; yStart += adjust * spaceOverChar; yStart -= adjust * spaceUnderChar; yStart -= pos.height; /* determine start X coordinate. */ final float x; if (isMonoSpacedFont(fontObj)) { x = pos.x; } else { // float leftOfText = text.getX() - (adjust * fontBB.getWidth()); // // x = leftOfText + adjust * character.getLowerLeftX(); x = pos.x; } /* * It was much easier to write the word segmentation code with full font width, * so lets keep that. I havent seen this causing any problems */ float w = pos.width; /* * Line segmentation code was obviously much easier by not having any descenders which * can even overlap into the following line. Math symbols need to stay full length */ final float characterHeight; if (NO_DESCENDERS && (Character.getType(c.charAt(0)) != (int) Character.MATH_SYMBOL)) { characterHeight = character.getUpperRightY(); } else { characterHeight = character.getHeight(); } float h = adjust * (characterHeight); /* correct if the NO_DESCENDERS hack made this character have no height*/ if (NO_DESCENDERS && h < 0.1f) { h = pos.height; } newPos = new Rectangle(x, yStart, w, h); } else { /* * here we have a lot less information, so keep most of what was calculated. Just offset * the Y coordinate */ float h = pos.height; float w = pos.width; float startY = pos.y - h;// * 0.8f; if (fontObj instanceof PDType3Font) { /* * type 3 fonts typically have almost no information * try to mitigate the damage by keeping them small. */ h *= 0.5f; startY += h; /* this is a _very_ quick and dirty hack */ } newPos = new Rectangle(pos.x, startY, w, h); } if (log.isTraceEnabled()) { log.trace("LOG00730:Text " + c + ", " + "pos from " + pos + " to " + newPos); } text.setBaseLine(pos.y); text.setPos(newPos); }
From source file:org.elacin.pdfextract.datasource.pdfbox.PDFBoxIntegration.java
License:Apache License
/** * This will process the contents of a page. * * @param page The page to process.//from w w w.j a v a2s . co m * @param content The contents of the page. * @throws IOException If there is an error processing the page. */ protected void processPage(@NotNull PDPage page, COSStream content) throws IOException { if ((currentPageNo >= startPage) && (currentPageNo <= endPage)) { /* show which page we are working on in the log */ MDC.put("page", currentPageNo); charactersForPage.clear(); characterListMapping.clear(); pageSize = page.findCropBox().createDimension(); rotation = (float) page.findRotation(); /* this is used to 'draw' images on during pdf parsing */ graphicsDrawer.clearSurface(); setGraphicsState(null); resetEngine(); processStream(page, page.findResources(), content); filterOutBadFonts(charactersForPage); /* filter out remaining definite bad characters */ filterOutControlCodes(charactersForPage); List<PhysicalText> texts = new ArrayList<PhysicalText>(charactersForPage.size()); for (ETextPosition tp : charactersForPage) { texts.add(tp.convertText(fonts)); } final PDRectangle mediaBox = page.findMediaBox(); Rectangle dimensions = new Rectangle(mediaBox.getLowerLeftX(), mediaBox.getLowerLeftY(), mediaBox.getWidth(), mediaBox.getHeight()); PageContent thisPage = new PageContent(texts, graphicsDrawer.getGraphicContents(), currentPageNo, dimensions); docContent.addPage(thisPage); MDC.remove("page"); } }
From source file:org.fit.pdfdom.PDFBoxTree.java
License:Open Source License
protected AffineTransform createCurrentPageTransformation() { PDRectangle cb = pdpage.getCropBox(); AffineTransform pageTransform = new AffineTransform(); switch (pdpage.getRotation()) { case 90:/*from w w w. ja v a2 s . c o m*/ pageTransform.translate(cb.getHeight(), 0); break; case 180: pageTransform.translate(cb.getWidth(), cb.getHeight()); break; case 270: pageTransform.translate(0, cb.getWidth()); break; } pageTransform.rotate(Math.toRadians(pdpage.getRotation())); pageTransform.translate(0, cb.getHeight()); pageTransform.scale(1, -1); pageTransform.translate(-cb.getLowerLeftX(), -cb.getLowerLeftY()); return pageTransform; }
From source file:org.nuxeo.pdf.PDFPageNumbering.java
License:Open Source License
/** * Add page numbers and returns a <i>new</i> Blob. Original blob is not * modified. This code assumes:/*from ww w . j av a2 s .com*/ * <ul> * <li>There is no page numbers already (it always draw the numbers)</li> * <li>The pdf is not rotated</li> * <li>Default values apply: * <ul> * <li><code>inStartAtPage</code> and <code>inStartAtNumber</code> are set * to 1 if they are passed < 1.</li> * <li>If <code>inStartAtPage</code> is > number of pages it also is reset * to 1</li> * <li><code>inFontName</code> is set to "Helvetica" if "" or null</li> * <li><code>inFontSize</code> is <= 0, it is set to 16</li> * <li><code>inHex255Color</code> is set to black if "", null or if its * length < 6. Expected format is 0xrrggbb, #rrggbb or just rrggbb</li> * <li><code>inPosition</code> is set to <code>BOTTOM_RIGHT</code> if null</li> * </ul> * </li> * <li></li> * </ul> * * @param inBlob * @param inStartAtPage * @param inStartAtNumber * @param inFontName * @param inFontSize * @param inHex255Color * @param inPosition * @return Blob * @throws IOException * @throws COSVisitorException * * @since 5.9.5 */ public Blob addPageNumbers(int inStartAtPage, int inStartAtNumber, String inFontName, float inFontSize, String inHex255Color, PAGE_NUMBER_POSITION inPosition) throws IOException, COSVisitorException { Blob result = null; PDDocument doc = null; inStartAtPage = inStartAtPage < 1 ? 1 : inStartAtPage; int pageNumber = inStartAtNumber < 1 ? 1 : inStartAtNumber; inFontSize = inFontSize <= 0 ? DEFAULT_FONT_SIZE : inFontSize; int[] rgb = PDFUtils.hex255ToRGB(inHex255Color); try { doc = PDDocument.load(blob.getStream()); List<?> allPages; PDFont font; int max; if (inFontName == null || inFontName.isEmpty()) { font = PDType1Font.HELVETICA; } else { font = PDType1Font.getStandardFont(inFontName); if (font == null) { font = new PDType1Font(inFontName); } } allPages = doc.getDocumentCatalog().getAllPages(); max = allPages.size(); inStartAtPage = inStartAtPage > max ? 1 : inStartAtPage; for (int i = inStartAtPage; i <= max; i++) { String pageNumAsStr = "" + pageNumber; pageNumber += 1; PDPage page = (PDPage) allPages.get(i - 1); PDPageContentStream footercontentStream = new PDPageContentStream(doc, page, true, true); float stringWidth = font.getStringWidth(pageNumAsStr) * inFontSize / 1000f; float stringHeight = font.getFontDescriptor().getFontBoundingBox().getHeight() * inFontSize / 1000; PDRectangle pageRect = page.findMediaBox(); float xMoveAmount, yMoveAmount; if (inPosition == null) { inPosition = PAGE_NUMBER_POSITION.BOTTOM_RIGHT; } switch (inPosition) { case BOTTOM_LEFT: xMoveAmount = 10; yMoveAmount = pageRect.getLowerLeftY() + 10; break; case BOTTOM_CENTER: xMoveAmount = (pageRect.getUpperRightX() / 2) - (stringWidth / 2); yMoveAmount = pageRect.getLowerLeftY() + 10; break; case TOP_LEFT: xMoveAmount = 10; yMoveAmount = pageRect.getHeight() - stringHeight - 10; break; case TOP_CENTER: xMoveAmount = (pageRect.getUpperRightX() / 2) - (stringWidth / 2); yMoveAmount = pageRect.getHeight() - stringHeight - 10; break; case TOP_RIGHT: xMoveAmount = pageRect.getUpperRightX() - 10 - stringWidth; yMoveAmount = pageRect.getHeight() - stringHeight - 10; break; // Bottom-right is the default default: xMoveAmount = pageRect.getUpperRightX() - 10 - stringWidth; yMoveAmount = pageRect.getLowerLeftY() + 10; break; } footercontentStream.beginText(); footercontentStream.setFont(font, inFontSize); footercontentStream.moveTextPositionByAmount(xMoveAmount, yMoveAmount); footercontentStream.setNonStrokingColor(rgb[0], rgb[1], rgb[2]); footercontentStream.drawString(pageNumAsStr); footercontentStream.endText(); footercontentStream.close(); } File tempFile = File.createTempFile("pdfutils-", ".pdf"); doc.save(tempFile); result = new FileBlob(tempFile); Framework.trackFile(tempFile, result); } finally { if (doc != null) { doc.close(); } } return result; }
From source file:org.opencps.util.ExtractTextLocations.java
License:Open Source License
public ExtractTextLocations(String fullPath) throws IOException { PDDocument document = null;//from w w w. j av a 2 s . co m try { File input = new File(fullPath); document = PDDocument.load(input); if (document.isEncrypted()) { try { document.decrypt(StringPool.BLANK); } catch (Exception e) { _log.error(e); } } // ExtractTextLocations printer = new ExtractTextLocations(); List allPages = document.getDocumentCatalog().getAllPages(); if (allPages != null && allPages.size() > 0) { PDPage page = (PDPage) allPages.get(0); PDStream contents = page.getContents(); if (contents != null) { this.processStream(page, page.findResources(), page.getContents().getStream()); } PDRectangle pageSize = page.findMediaBox(); if (pageSize != null) { setPageWidth(pageSize.getWidth()); setPageHeight(pageSize.getHeight()); setPageLLX(pageSize.getLowerLeftX()); setPageURX(pageSize.getUpperRightX()); setPageLLY(pageSize.getLowerLeftY()); setPageURY(pageSize.getUpperRightY()); } } } catch (Exception e) { _log.error(e); } finally { if (document != null) { document.close(); } } }
From source file:org.xmlcml.pdf2svg.PDFPage2SVGConverter.java
License:Apache License
/** * DUPLICATE OF SUPER SO WE CAN DEBUG//from w w w.jav a2 s .c o m * This will draw the page to the requested context. * * @param g The graphics context to draw onto. * @param p The page to draw. * @param pageDimension The size of the page to draw. * * @throws IOException If there is an IO error while drawing the page. */ public void drawPage(Graphics g, PDPage p, Dimension pageDimension) throws IOException { super.drawPage(g, p, pageDimension); // cannot use this because private // graphics = (Graphics2D)g; Graphics2D g2d = (Graphics2D) g; // g2d = (Graphics2D)g; page = p; pageSize = pageDimension; g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); g2d.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS, RenderingHints.VALUE_FRACTIONALMETRICS_ON); // Only if there is some content, we have to process it. // Otherwise we are done here and we will produce an empty page if (page.getContents() != null) { PDResources resources = page.findResources(); processStream(page, resources, page.getContents().getStream()); } List annotations = page.getAnnotations(); if (annotations.size() > 0) { throw new RuntimeException("ANNOTATIONS"); } for (int i = 0; i < annotations.size(); i++) { PDAnnotation annot = (PDAnnotation) annotations.get(i); PDRectangle rect = annot.getRectangle(); String appearanceName = annot.getAppearanceStream(); PDAppearanceDictionary appearDictionary = annot.getAppearance(); if (appearDictionary != null) { if (appearanceName == null) { appearanceName = "default"; } Map appearanceMap = appearDictionary.getNormalAppearance(); if (appearanceMap != null) { PDAppearanceStream appearance = (PDAppearanceStream) appearanceMap.get(appearanceName); if (appearance != null) { g.translate((int) rect.getLowerLeftX(), (int) -rect.getLowerLeftY()); processSubStream(page, appearance.getResources(), appearance.getStream()); g.translate((int) -rect.getLowerLeftX(), (int) +rect.getLowerLeftY()); } } } } }
From source file:paper2ebook.Transformer.java
License:Apache License
/** * Heuristic search of the list of interesting areas in page, returned by * natural read order.//from ww w .ja va 2s . c o m */ public List<PDRectangle> getFragments(PDPage page) { List<PDRectangle> fragments = new ArrayList<PDRectangle>(); // TODO: naive 2 columns hack: rewrite me to introspect the document // structure instead PDRectangle origBox = page.findCropBox(); float width = origBox.getWidth(); float height = origBox.getHeight(); // top left PDRectangle box = new PDRectangle(); box.setLowerLeftX(origBox.getLowerLeftX()); box.setLowerLeftY(origBox.getLowerLeftY() + height / 2); box.setUpperRightX(origBox.getUpperRightX() / 2); box.setUpperRightY(origBox.getUpperRightY()); fragments.add(box); // bottom left box = new PDRectangle(); box.setLowerLeftX(origBox.getLowerLeftX()); box.setLowerLeftY(origBox.getLowerLeftY()); box.setUpperRightX(origBox.getUpperRightX() / 2); box.setUpperRightY(origBox.getUpperRightY() / 2); fragments.add(box); // top right box = new PDRectangle(); box.setLowerLeftX(origBox.getLowerLeftX() + width / 2); box.setLowerLeftY(origBox.getLowerLeftY() + height / 2); box.setUpperRightX(origBox.getUpperRightX()); box.setUpperRightY(origBox.getUpperRightY()); fragments.add(box); // bottom right box = new PDRectangle(); box.setLowerLeftX(origBox.getLowerLeftX() + width / 2); box.setLowerLeftY(origBox.getLowerLeftY()); box.setUpperRightX(origBox.getUpperRightX()); box.setUpperRightY(origBox.getUpperRightY() / 2); fragments.add(box); return fragments; }
From source file:PDF.RotatePDF.java
private void transformPage(PDDocument document, PDPage page, AffineTransform at) throws IOException, COSVisitorException { PDRectangle cropBox = page.findCropBox(); float xOffset = (cropBox.getUpperRightX() + cropBox.getLowerLeftX()) / 2f; float yOffset = (cropBox.getUpperRightY() + cropBox.getLowerLeftY()) / 2f; AffineTransform transform = AffineTransform.getTranslateInstance(xOffset, yOffset); transform.concatenate(at);/*from w ww .j ava 2 s . com*/ transform.concatenate(AffineTransform.getTranslateInstance(-xOffset, -yOffset)); PDPageContentStream stream = new PDPageContentStream(document, page, true, false); stream.concatenate2CTM(transform); stream.close(); COSBase contents = page.getCOSDictionary().getDictionaryObject(COSName.CONTENTS); if (contents instanceof COSStreamArray) { COSStreamArray contentsArray = (COSStreamArray) contents; COSArray newArray = new COSArray(); newArray.add(contentsArray.get(contentsArray.getStreamCount() - 1)); for (int i = 0; i < contentsArray.getStreamCount() - 1; i++) { newArray.add(contentsArray.get(i)); } COSStreamArray newStreamArray = new COSStreamArray(newArray); page.getCOSDictionary().setItem(COSName.CONTENTS, newStreamArray); } }
From source file:se.streamsource.streamflow.web.application.pdf.PdfDocument.java
License:Apache License
public PDDocument generateHeaderAndPageNumbers(PdfFont font, String... headers) { try {//from w ww . j a v a 2 s . c o m int pageTotal = pdf.getNumberOfPages(); int pageCount = 1; float stringWidth = 0.0f; float positionX = 0.0f; for (Object o : pdf.getDocumentCatalog().getAllPages()) { String numbering = "" + pageCount + " (" + pageTotal + ")"; PDPage page = (PDPage) o; PDRectangle pageSize = page.findMediaBox(); float positionY = pageSize.getHeight() - headerMargin + font.height; PDPageContentStream stream = new PDPageContentStream(pdf, page, true, true); stream.beginText(); stream.setFont(font.font, font.size); stream.moveTextPositionByAmount(0, positionY); for (String header : headers) { stringWidth = font.font.getStringWidth(header); positionX = (pageSize.getWidth() - rightMargin - (stringWidth * font.size) / 1000f); stream.moveTextPositionByAmount(positionX, 0); stream.drawString(header); stream.moveTextPositionByAmount(-positionX, -font.height); positionY -= font.height; } stringWidth = font.font.getStringWidth(numbering); positionX = (pageSize.getWidth() - rightMargin - (stringWidth * font.size) / 1000f); stream.moveTo(pageSize.getLowerLeftX(), pageSize.getLowerLeftY()); stream.moveTextPositionByAmount(positionX, 30 - positionY); stream.drawString(numbering); stream.endText(); stream.close(); pageCount++; } } catch (IOException ioe) { close(); } return closeAndReturn(); }
From source file:uk.ac.leeds.ccg.andyt.rdl.web.RDL_ParsePDF.java
/** * Converts PDF to a String a page at a time. * * @param f//from w w w . j ava 2s. co m * @return * @throws IOException */ public static String parseToString(File f) throws IOException { String result; result = ""; PDDocument doc = PDDocument.load(f); PDFTextStripperByArea stripper = new PDFTextStripperByArea(); stripper.setSortByPosition(true); //Rectangle rect = new Rectangle(10, 280, 275, 60); //PDPage firstPage = doc.getPage(0); for (PDPage page : doc.getPages()) { PDRectangle aPDRectangle; aPDRectangle = page.getBBox(); Rectangle2D.Double rect = new Rectangle2D.Double(aPDRectangle.getLowerLeftX(), aPDRectangle.getLowerLeftY(), //aPDRectangle.getUpperRightY(), aPDRectangle.getWidth(), aPDRectangle.getHeight()); stripper.addRegion("class1", rect); stripper.extractRegions(page); System.out.println("<Text in the area:" + rect + ">"); String text; text = stripper.getTextForRegion("class1"); System.out.println(text); System.out.println("</Text in the area:" + rect + ">"); result += text; } return result; }