List of usage examples for org.apache.pdfbox.pdmodel.common PDRectangle getUpperRightY
public float getUpperRightY()
From source file:org.elacin.pdfextract.datasource.pdfbox.PDFBoxIntegration.java
License:Apache License
private void correctPosition(@NotNull final PDFont fontObj, final byte[] string, final int i, @NotNull final String c, final float fontSizeText, final float glyphSpaceToTextSpaceFactor, float horizontalScalingText, final int codeLength, @NotNull final ETextPosition text) throws IOException { /**/* w ww . ja v a2s . c o m*/ * Provide precise positioning of glyphs. * * There are several problems right which needs to be worked around: * * 1. Sometimes the PDF will make room for a glyph which belongs to a font with * one or more very tall glyphs by jumping up on the page before drawing. * Since most glyphs are (much) shorter than the tallest one, we need to make * up for that by adjusting the Y coordinate back down. The distance which * is jumped up is embedded in the PDF files, so there is no other way to go * about this. * * 'beforeRoomForGlyph' is the position we were at before the jump back. * Then we need to add spaceOverChar which is my estimate of where the glyph * should begin. the result is kept in 'startY' * * 2. The default height we get might also be too big, so recalculate that based * on character bounding * */ final BoundingBox character = fontObj.getCharacterBoundingBox(string, i, codeLength); PDRectangle fontBB = null; try { fontBB = fontObj.getFontBoundingBox(); } catch (RuntimeException e) { // ignore, this is frequently not implemented } final Rectangle pos = text.getPos(); float adjust = (fontSizeText * horizontalScalingText) / glyphSpaceToTextSpaceFactor; adjust *= getTextMatrix().getXScale(); final Rectangle newPos; if ((character != null) && (fontBB != null) && (character.getHeight() > 0.0f) && (fontBB.getHeight() > 0.0f)) { /* remove the upper and lower bounds filtered away by character */ final float spaceUnderChar = Math.min(fontBB.getLowerLeftY(), character.getLowerLeftY()); final float spaceOverChar = fontBB.getUpperRightY() - character.getUpperRightY(); final float fontHeight = fontBB.getHeight(); /* calculate the upper left corner of the rendered glyph */ float yStart = pos.endY - adjust * fontHeight; yStart += adjust * spaceOverChar; yStart -= adjust * spaceUnderChar; yStart -= pos.height; /* determine start X coordinate. */ final float x; if (isMonoSpacedFont(fontObj)) { x = pos.x; } else { // float leftOfText = text.getX() - (adjust * fontBB.getWidth()); // // x = leftOfText + adjust * character.getLowerLeftX(); x = pos.x; } /* * It was much easier to write the word segmentation code with full font width, * so lets keep that. I havent seen this causing any problems */ float w = pos.width; /* * Line segmentation code was obviously much easier by not having any descenders which * can even overlap into the following line. Math symbols need to stay full length */ final float characterHeight; if (NO_DESCENDERS && (Character.getType(c.charAt(0)) != (int) Character.MATH_SYMBOL)) { characterHeight = character.getUpperRightY(); } else { characterHeight = character.getHeight(); } float h = adjust * (characterHeight); /* correct if the NO_DESCENDERS hack made this character have no height*/ if (NO_DESCENDERS && h < 0.1f) { h = pos.height; } newPos = new Rectangle(x, yStart, w, h); } else { /* * here we have a lot less information, so keep most of what was calculated. Just offset * the Y coordinate */ float h = pos.height; float w = pos.width; float startY = pos.y - h;// * 0.8f; if (fontObj instanceof PDType3Font) { /* * type 3 fonts typically have almost no information * try to mitigate the damage by keeping them small. */ h *= 0.5f; startY += h; /* this is a _very_ quick and dirty hack */ } newPos = new Rectangle(pos.x, startY, w, h); } if (log.isTraceEnabled()) { log.trace("LOG00730:Text " + c + ", " + "pos from " + pos + " to " + newPos); } text.setBaseLine(pos.y); text.setPos(newPos); }
From source file:org.nuxeo.pdf.PDFLinks.java
License:Apache License
protected void loadAndPreflightPdf() throws NuxeoException { if (pdfDoc == null) { pdfDoc = PDFUtils.load(pdfBlob, password); @SuppressWarnings("unchecked") List<PDPage> allPages = pdfDoc.getDocumentCatalog().getAllPages(); try {//from ww w . j a va 2 s.com stripper = new PDFTextStripperByArea(); for (PDPage page : allPages) { List<PDAnnotation> annotations = page.getAnnotations(); for (int j = 0; j < annotations.size(); j++) { PDAnnotation annot = (PDAnnotation) annotations.get(j); if (annot instanceof PDAnnotationLink) { PDAnnotationLink link = (PDAnnotationLink) annot; PDRectangle rect = link.getRectangle(); // need to reposition link rectangle to match text space float x = rect.getLowerLeftX(); float y = rect.getUpperRightY(); float width = rect.getWidth(); float height = rect.getHeight(); int rotation = page.findRotation(); if (rotation == 0) { PDRectangle pageSize = page.findMediaBox(); y = pageSize.getHeight() - y; } else if (rotation == 90) { // do nothing } Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height); stripper.addRegion("" + j, awtRect); } } } } catch (IOException e) { throw new NuxeoException("Cannot prefilght and prepare regions", e); } } }
From source file:org.olat.core.util.pdf.PdfDocument.java
License:Apache License
public PDPage addPage() throws IOException { if (currentContentStream != null) { currentContentStream.close();//from w w w . j a v a 2 s . c o m } PDPage page = new PDPage(PDPage.PAGE_SIZE_A4); document.addPage(page); currentPage = page; currentContentStream = new PDPageContentStream(document, currentPage); PDRectangle mediabox = currentPage.findMediaBox(); width = mediabox.getWidth() - 2 * marginLeftRight; currentY = mediabox.getUpperRightY() - marginTopBottom; return page; }
From source file:org.opencps.util.ExtractTextLocations.java
License:Open Source License
public ExtractTextLocations(String fullPath) throws IOException { PDDocument document = null;// ww w.ja v a 2 s . c o m try { File input = new File(fullPath); document = PDDocument.load(input); if (document.isEncrypted()) { try { document.decrypt(StringPool.BLANK); } catch (Exception e) { _log.error(e); } } // ExtractTextLocations printer = new ExtractTextLocations(); List allPages = document.getDocumentCatalog().getAllPages(); if (allPages != null && allPages.size() > 0) { PDPage page = (PDPage) allPages.get(0); PDStream contents = page.getContents(); if (contents != null) { this.processStream(page, page.findResources(), page.getContents().getStream()); } PDRectangle pageSize = page.findMediaBox(); if (pageSize != null) { setPageWidth(pageSize.getWidth()); setPageHeight(pageSize.getHeight()); setPageLLX(pageSize.getLowerLeftX()); setPageURX(pageSize.getUpperRightX()); setPageLLY(pageSize.getLowerLeftY()); setPageURY(pageSize.getUpperRightY()); } } } catch (Exception e) { _log.error(e); } finally { if (document != null) { document.close(); } } }
From source file:org.paxle.parser.pdf.impl.PdfParser.java
License:Open Source License
/** * A function to extract embedded URIs from the PDF-document. * /*from w w w . j av a 2 s .co m*/ */ protected void extractURLs(IParserDocument parserDoc, PDDocument pddDoc) throws IOException { final PDDocumentCatalog pddDocCatalog = pddDoc.getDocumentCatalog(); if (pddDocCatalog == null) return; @SuppressWarnings("unchecked") final List<PDPage> allPages = pddDocCatalog.getAllPages(); if (allPages == null || allPages.isEmpty()) return; for (int i = 0; i < allPages.size(); i++) { final PDFTextStripperByArea stripper = new PDFTextStripperByArea(); final PDPage page = (PDPage) allPages.get(i); @SuppressWarnings("unchecked") final List<PDAnnotation> annotations = page.getAnnotations(); if (annotations == null || annotations.isEmpty()) return; //first setup text extraction regions for (int j = 0; j < annotations.size(); j++) { final PDAnnotation annot = (PDAnnotation) annotations.get(j); if (annot instanceof PDAnnotationLink) { final PDAnnotationLink link = (PDAnnotationLink) annot; final PDRectangle rect = link.getRectangle(); //need to reposition link rectangle to match text space float x = rect.getLowerLeftX(); float y = rect.getUpperRightY(); float width = rect.getWidth(); float height = rect.getHeight(); int rotation = page.findRotation(); if (rotation == 0) { PDRectangle pageSize = page.findMediaBox(); y = pageSize.getHeight() - y; } else if (rotation == 90) { //do nothing } Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height); stripper.addRegion("" + j, awtRect); } } stripper.extractRegions(page); for (int j = 0; j < annotations.size(); j++) { final PDAnnotation annot = (PDAnnotation) annotations.get(j); if (annot instanceof PDAnnotationLink) { final PDAnnotationLink link = (PDAnnotationLink) annot; final PDAction action = link.getAction(); final String urlText = stripper.getTextForRegion("" + j); if (action instanceof PDActionURI) { final PDActionURI embeddedUri = (PDActionURI) action; final URI temp = URI.create(embeddedUri.getURI()); parserDoc.addReference(temp, urlText, Constants.SERVICE_PID + ":" + PID); } } } } }
From source file:org.polarsys.kitalpha.doc.doc2model.tikaparsing.pdf.Doc2ModelTextStripper.java
License:Apache License
/** * This will process a TextPosition object and add the * text to the list of characters on a page. It takes care of * overlapping text.//ww w .j a v a 2 s .c o m * * @param text The text to process. */ protected void processTextPosition(TextPosition rawtext) { StylizedTextPosition text = (StylizedTextPosition) processStyle(rawtext); boolean showCharacter = true; if (suppressDuplicateOverlappingText) { showCharacter = false; String textCharacter = text.getCharacter(); float textX = text.getX(); float textY = text.getY(); List<TextPosition> sameTextCharacters = (List<TextPosition>) characterListMapping.get(textCharacter); if (sameTextCharacters == null) { sameTextCharacters = new ArrayList<TextPosition>(); characterListMapping.put(textCharacter, sameTextCharacters); } // RDD - Here we compute the value that represents the end of the rendered // text. This value is used to determine whether subsequent text rendered // on the same line overwrites the current text. // // We subtract any positive padding to handle cases where extreme amounts // of padding are applied, then backed off (not sure why this is done, but there // are cases where the padding is on the order of 10x the character width, and // the TJ just backs up to compensate after each character). Also, we subtract // an amount to allow for kerning (a percentage of the width of the last // character). // boolean suppressCharacter = false; float tolerance = (text.getWidth() / textCharacter.length()) / 3.0f; for (int i = 0; i < sameTextCharacters.size() && textCharacter != null; i++) { TextPosition character = sameTextCharacters.get(i); String charCharacter = character.getCharacter(); float charX = character.getX(); float charY = character.getY(); //only want to suppress if (charCharacter != null && //charCharacter.equals( textCharacter ) && within(charX, textX, tolerance) && within(charY, textY, tolerance)) { suppressCharacter = true; } } if (!suppressCharacter) { sameTextCharacters.add(text); showCharacter = true; } } if (showCharacter) { //if we are showing the character then we need to determine which //article it belongs to. int foundArticleDivisionIndex = -1; int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1; int notFoundButFirstLeftArticleDivisionIndex = -1; int notFoundButFirstAboveArticleDivisionIndex = -1; float x = text.getX(); float y = text.getY(); if (shouldSeparateByBeads) { for (int i = 0; i < pageArticles.size() && foundArticleDivisionIndex == -1; i++) { PDThreadBead bead = (PDThreadBead) pageArticles.get(i); if (bead != null) { PDRectangle rect = bead.getRectangle(); if (rect.contains(x, y)) { foundArticleDivisionIndex = i * 2 + 1; } else if ((x < rect.getLowerLeftX() || y < rect.getUpperRightY()) && notFoundButFirstLeftAndAboveArticleDivisionIndex == -1) { notFoundButFirstLeftAndAboveArticleDivisionIndex = i * 2; } else if (x < rect.getLowerLeftX() && notFoundButFirstLeftArticleDivisionIndex == -1) { notFoundButFirstLeftArticleDivisionIndex = i * 2; } else if (y < rect.getUpperRightY() && notFoundButFirstAboveArticleDivisionIndex == -1) { notFoundButFirstAboveArticleDivisionIndex = i * 2; } } else { foundArticleDivisionIndex = 0; } } } else { foundArticleDivisionIndex = 0; } int articleDivisionIndex = -1; if (foundArticleDivisionIndex != -1) { articleDivisionIndex = foundArticleDivisionIndex; } else if (notFoundButFirstLeftAndAboveArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex; } else if (notFoundButFirstLeftArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex; } else if (notFoundButFirstAboveArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex; } else { articleDivisionIndex = charactersByArticle.size() - 1; } List<TextPosition> textList = (List<TextPosition>) charactersByArticle.get(articleDivisionIndex); /* In the wild, some PDF encoded documents put diacritics (accents on * top of characters) into a separate Tj element. When displaying them * graphically, the two chunks get overlayed. With text output though, * we need to do the overlay. This code recombines the diacritic with * its associated character if the two are consecutive. */ if (textList.isEmpty()) { textList.add(text); } else { /* test if we overlap the previous entry. * Note that we are making an assumption that we need to only look back * one TextPosition to find what we are overlapping. * This may not always be true. */ TextPosition previousTextPosition = (TextPosition) textList.get(textList.size() - 1); if (text.isDiacritic() && previousTextPosition.contains(text)) { previousTextPosition.mergeDiacritic(text, normalize); } /* If the previous TextPosition was the diacritic, merge it into this * one and remove it from the list. */ else if (previousTextPosition.isDiacritic() && text.contains(previousTextPosition)) { text.mergeDiacritic(previousTextPosition, normalize); textList.remove(textList.size() - 1); textList.add(text); } else { textList.add(text); } } } }
From source file:org.xwiki.test.misc.PDFTest.java
License:Open Source License
/** * Code adapted from http://www.docjar.com/html/api/org/apache/pdfbox/examples/pdmodel/PrintURLs.java.html *//*from w ww .jav a 2 s .com*/ private Map<String, PDAction> extractLinks(PDPage page) throws Exception { Map<String, PDAction> links = new HashMap<String, PDAction>(); PDFTextStripperByArea stripper = new PDFTextStripperByArea(); List<PDAnnotation> annotations = page.getAnnotations(); // First setup the text extraction regions. for (int j = 0; j < annotations.size(); j++) { PDAnnotation annotation = annotations.get(j); if (annotation instanceof PDAnnotationLink) { PDAnnotationLink link = (PDAnnotationLink) annotation; PDRectangle rect = link.getRectangle(); // Need to reposition link rectangle to match text space. float x = rect.getLowerLeftX(); float y = rect.getUpperRightY(); float width = rect.getWidth(); float height = rect.getHeight(); int rotation = page.getRotation(); if (rotation == 0) { PDRectangle pageSize = page.getMediaBox(); y = pageSize.getHeight() - y; } else if (rotation == 90) { // Do nothing. } Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height); stripper.addRegion(String.valueOf(j), awtRect); } } stripper.extractRegions(page); for (int j = 0; j < annotations.size(); j++) { PDAnnotation annotation = annotations.get(j); if (annotation instanceof PDAnnotationLink) { PDAnnotationLink link = (PDAnnotationLink) annotation; String label = stripper.getTextForRegion(String.valueOf(j)).trim(); links.put(label, link.getAction()); } } return links; }
From source file:paper2ebook.Transformer.java
License:Apache License
/** * Heuristic search of the list of interesting areas in page, returned by * natural read order.//from w ww . j a v a 2 s . com */ public List<PDRectangle> getFragments(PDPage page) { List<PDRectangle> fragments = new ArrayList<PDRectangle>(); // TODO: naive 2 columns hack: rewrite me to introspect the document // structure instead PDRectangle origBox = page.findCropBox(); float width = origBox.getWidth(); float height = origBox.getHeight(); // top left PDRectangle box = new PDRectangle(); box.setLowerLeftX(origBox.getLowerLeftX()); box.setLowerLeftY(origBox.getLowerLeftY() + height / 2); box.setUpperRightX(origBox.getUpperRightX() / 2); box.setUpperRightY(origBox.getUpperRightY()); fragments.add(box); // bottom left box = new PDRectangle(); box.setLowerLeftX(origBox.getLowerLeftX()); box.setLowerLeftY(origBox.getLowerLeftY()); box.setUpperRightX(origBox.getUpperRightX() / 2); box.setUpperRightY(origBox.getUpperRightY() / 2); fragments.add(box); // top right box = new PDRectangle(); box.setLowerLeftX(origBox.getLowerLeftX() + width / 2); box.setLowerLeftY(origBox.getLowerLeftY() + height / 2); box.setUpperRightX(origBox.getUpperRightX()); box.setUpperRightY(origBox.getUpperRightY()); fragments.add(box); // bottom right box = new PDRectangle(); box.setLowerLeftX(origBox.getLowerLeftX() + width / 2); box.setLowerLeftY(origBox.getLowerLeftY()); box.setUpperRightX(origBox.getUpperRightX()); box.setUpperRightY(origBox.getUpperRightY() / 2); fragments.add(box); return fragments; }
From source file:PDF.RotatePDF.java
private void transformPage(PDDocument document, PDPage page, AffineTransform at) throws IOException, COSVisitorException { PDRectangle cropBox = page.findCropBox(); float xOffset = (cropBox.getUpperRightX() + cropBox.getLowerLeftX()) / 2f; float yOffset = (cropBox.getUpperRightY() + cropBox.getLowerLeftY()) / 2f; AffineTransform transform = AffineTransform.getTranslateInstance(xOffset, yOffset); transform.concatenate(at);//from w w w . ja va 2 s . c o m transform.concatenate(AffineTransform.getTranslateInstance(-xOffset, -yOffset)); PDPageContentStream stream = new PDPageContentStream(document, page, true, false); stream.concatenate2CTM(transform); stream.close(); COSBase contents = page.getCOSDictionary().getDictionaryObject(COSName.CONTENTS); if (contents instanceof COSStreamArray) { COSStreamArray contentsArray = (COSStreamArray) contents; COSArray newArray = new COSArray(); newArray.add(contentsArray.get(contentsArray.getStreamCount() - 1)); for (int i = 0; i < contentsArray.getStreamCount() - 1; i++) { newArray.add(contentsArray.get(i)); } COSStreamArray newStreamArray = new COSStreamArray(newArray); page.getCOSDictionary().setItem(COSName.CONTENTS, newStreamArray); } }
From source file:so.rezervacija.StampajRezervaciju.java
@Override protected void izvrsiKonkretnuOperaciju() throws Exception { PDDocument doc = null;//from w w w. j a va 2 s .com PDPage page = null; try { doc = new PDDocument(); page = new PDPage(); doc.addPage(page); PDFont pdfFont = PDType1Font.HELVETICA_BOLD; float fontSize = 25; float leading = 1.5f * fontSize; PDPageContentStream contentStream = new PDPageContentStream(doc, page); PDRectangle mediabox = page.findMediaBox(); float margin = 72; float width = mediabox.getWidth() - 2 * margin; float startX = mediabox.getLowerLeftX() + margin; float startY = mediabox.getUpperRightY() - margin; String text = "Izvrsili ste rezervaciju za tretman " + r.getTretman() + ", vreme rezervacije:" + new SimpleDateFormat("YYYY-MM-dd HH:mm").format(r.getVreme()) + " zaposleni koji ce vrsiti tretman:" + r.getZaposleni().getImePrezime(); List<String> lines = new ArrayList<String>(); int lastSpace = -1; while (text.length() > 0) { int spaceIndex = text.indexOf(' ', lastSpace + 1); if (spaceIndex < 0) { lines.add(text); text = ""; } else { String subString = text.substring(0, spaceIndex); float size = fontSize * pdfFont.getStringWidth(subString) / 1000; if (size > width) { if (lastSpace < 0) // So we have a word longer than the line... draw it anyways { lastSpace = spaceIndex; } subString = text.substring(0, lastSpace); lines.add(subString); text = text.substring(lastSpace).trim(); lastSpace = -1; } else { lastSpace = spaceIndex; } } } contentStream.beginText(); contentStream.setFont(pdfFont, fontSize); contentStream.moveTextPositionByAmount(startX, startY); for (String line : lines) { contentStream.drawString(line); contentStream.moveTextPositionByAmount(0, -leading); } contentStream.endText(); contentStream.close(); doc.save("PotvrdaRezervacije.pdf"); if (Desktop.isDesktopSupported()) { try { File myFile = new File("../ServerProjekat/PotvrdaRezervacije.pdf"); Desktop.getDesktop().open(myFile); } catch (IOException ex) { // no application registered for PDFs } } doc.close(); } catch (Exception e) { System.out.println(e); } }