List of usage examples for org.apache.pdfbox.pdmodel.common PDRectangle getLowerLeftX
public float getLowerLeftX()
From source file:com.formkiq.core.service.generator.pdfbox.TextSearchAreaFilterDefault.java
License:Apache License
/** * Calculate Horziontal Matching Rectangle by field type. * * @param page {@link PDPage}/*ww w . j a va 2 s .c om*/ * @param field {@link PDField} * @param w {@link PDAnnotationWidget} * @return {@link PDRectangle} */ private PDRectangle getHorizontalRectangle(final PDPage page, final PDField field, final PDAnnotationWidget w) { PDRectangle wrect = w.getRectangle(); PDRectangle rect = new PDRectangle(0, wrect.getLowerLeftY(), wrect.getWidth() + wrect.getLowerLeftX(), wrect.getHeight()); if (field instanceof PDCheckBox) { rect = new PDRectangle(0, wrect.getLowerLeftY(), page.getMediaBox().getWidth(), wrect.getHeight()); } return rect; }
From source file:com.formkiq.core.service.generator.pdfbox.TextSearchAreaFilterDefault.java
License:Apache License
/** * Calculate Vertical Matching Rectangle by field type. * * @param field {@link PDField}/*from w w w.j a v a 2 s. c o m*/ * @param w {@link PDAnnotationWidget} * @return {@link PDRectangle} */ private PDRectangle getVerticalRectangle(final PDField field, final PDAnnotationWidget w) { PDRectangle rect = null; if (!(field instanceof PDCheckBox)) { PDRectangle wrect = w.getRectangle(); float addition = 2 * wrect.getHeight(); rect = new PDRectangle(wrect.getLowerLeftX() - addition, wrect.getLowerLeftY(), wrect.getWidth() + addition, wrect.getHeight() + addition); } return rect; }
From source file:com.formkiq.core.service.generator.pdfbox.TextSearchAreaFilterInsideLines.java
License:Apache License
@Override public List<PDFieldSearchRectangle> getTextSearchArea(final PDPage page, final PDField pdField, final PDAnnotationWidget widget, final List<PDRectangle> lineRects) { List<PDFieldSearchRectangle> area = new ArrayList<>(1); PDRectangle wrect = widget.getRectangle(); List<PDRectangle> leftlist = new ArrayList<>(); List<PDRectangle> rightlist = new ArrayList<>(); List<PDRectangle> toplist = new ArrayList<>(); List<PDRectangle> bottomlist = new ArrayList<>(); for (PDRectangle line : lineRects) { if (line.getLowerLeftY() < wrect.getLowerLeftY() && wrect.getUpperRightY() < line.getUpperRightY()) { if (line.getUpperRightX() < wrect.getLowerLeftX()) { leftlist.add(line);//from w ww .j a v a 2s .c o m } else if (wrect.getUpperRightX() < line.getLowerLeftX()) { rightlist.add(line); } } if (line.getLowerLeftX() < wrect.getLowerLeftX() && wrect.getUpperRightX() < line.getUpperRightX()) { if (line.getUpperRightY() < wrect.getLowerLeftY()) { bottomlist.add(line); } else if (wrect.getUpperRightY() < line.getUpperRightY()) { toplist.add(line); } } } PDRectangle left = !leftlist.isEmpty() ? Collections.max(leftlist, new PDRectangleXComparator()) : null; PDRectangle right = !rightlist.isEmpty() ? Collections.min(rightlist, new PDRectangleXComparator()) : null; PDRectangle top = !toplist.isEmpty() ? Collections.min(toplist, new PDRectangleYComparator()) : null; PDRectangle bottom = !bottomlist.isEmpty() ? Collections.max(bottomlist, new PDRectangleYComparator()) : null; if (left != null && right != null && top != null && bottom != null) { PDRectangle r = new PDRectangle(left.getLowerLeftX(), bottom.getLowerLeftY(), right.getUpperRightX() - left.getLowerLeftX(), top.getUpperRightY() - bottom.getLowerLeftY()); area.add(new PDFieldSearchRectangle(PDFieldAreaSearch.RECTANGLE, r)); } return area; }
From source file:com.infoimage.infotrac.pdfbox.PDFTextAnnotator.java
License:Apache License
private float[] computeQuads(PDRectangle rect) { float[] quads = new float[8]; // top left/*from w w w. j a va2s . c o m*/ quads[0] = rect.getLowerLeftX(); // x1 quads[1] = rect.getUpperRightY() - 2; // y1 // bottom left quads[2] = rect.getUpperRightX(); // x2 quads[3] = quads[1]; // y2 // top right quads[4] = quads[0]; // x3 quads[5] = rect.getLowerLeftY() - 2; // y3 // bottom right quads[6] = quads[2]; // x4 quads[7] = quads[5]; // y4 return quads; }
From source file:com.jaromin.alfresco.repo.content.transform.CorelDrawContentTransformer.java
License:Apache License
/** * /* w ww.java 2 s . c om*/ * @param pageImages * @param out * @throws IOException * @throws FileNotFoundException * @throws COSVisitorException */ private void buildPdfFromImages(Map<File, Dimension> pageImages, OutputStream out) throws IOException, FileNotFoundException, COSVisitorException { PDDocument doc = new PDDocument(); for (Map.Entry<File, Dimension> entry : pageImages.entrySet()) { File pFile = entry.getKey(); Dimension d = entry.getValue(); PDRectangle size = new PDRectangle(d.width, d.height); PDPage page = new PDPage(size); doc.addPage(page); PDXObjectImage ximage = new PDJpeg(doc, new FileInputStream(pFile)); PDPageContentStream contentStream = new PDPageContentStream(doc, page); contentStream.drawImage(ximage, size.getLowerLeftX(), size.getLowerLeftY()); contentStream.close(); } doc.save(out); }
From source file:com.repeatability.pdf.PDFTextStripper.java
License:Apache License
private void fillBeadRectangles(PDPage page) { beadRectangles = new ArrayList<PDRectangle>(); for (PDThreadBead bead : page.getThreadBeads()) { if (bead == null) { // can't skip, because of null entry handling in processTextPosition() beadRectangles.add(null);// ww w . j a v a2s .co m continue; } PDRectangle rect = bead.getRectangle(); // bead rectangle is in PDF coordinates (y=0 is bottom), // glyphs are in image coordinates (y=0 is top), // so we must flip PDRectangle mediaBox = page.getMediaBox(); float upperRightY = mediaBox.getUpperRightY() - rect.getLowerLeftY(); float lowerLeftY = mediaBox.getUpperRightY() - rect.getUpperRightY(); rect.setLowerLeftY(lowerLeftY); rect.setUpperRightY(upperRightY); // adjust for cropbox PDRectangle cropBox = page.getCropBox(); if (cropBox.getLowerLeftX() != 0 || cropBox.getLowerLeftY() != 0) { rect.setLowerLeftX(rect.getLowerLeftX() - cropBox.getLowerLeftX()); rect.setLowerLeftY(rect.getLowerLeftY() - cropBox.getLowerLeftY()); rect.setUpperRightX(rect.getUpperRightX() - cropBox.getLowerLeftX()); rect.setUpperRightY(rect.getUpperRightY() - cropBox.getLowerLeftY()); } beadRectangles.add(rect); } }
From source file:com.repeatability.pdf.PDFTextStripper.java
License:Apache License
/** * This will process a TextPosition object and add the text to the list of characters on a page. It takes care of * overlapping text.// ww w. ja v a 2s. co m * * @param text The text to process. */ @Override protected void processTextPosition(TextPosition text) { boolean showCharacter = true; if (suppressDuplicateOverlappingText) { showCharacter = false; String textCharacter = text.getUnicode(); float textX = text.getX(); float textY = text.getY(); TreeMap<Float, TreeSet<Float>> sameTextCharacters = characterListMapping.get(textCharacter); if (sameTextCharacters == null) { sameTextCharacters = new TreeMap<Float, TreeSet<Float>>(); characterListMapping.put(textCharacter, sameTextCharacters); } // RDD - Here we compute the value that represents the end of the rendered // text. This value is used to determine whether subsequent text rendered // on the same line overwrites the current text. // // We subtract any positive padding to handle cases where extreme amounts // of padding are applied, then backed off (not sure why this is done, but there // are cases where the padding is on the order of 10x the character width, and // the TJ just backs up to compensate after each character). Also, we subtract // an amount to allow for kerning (a percentage of the width of the last // character). boolean suppressCharacter = false; float tolerance = text.getWidth() / textCharacter.length() / 3.0f; SortedMap<Float, TreeSet<Float>> xMatches = sameTextCharacters.subMap(textX - tolerance, textX + tolerance); for (TreeSet<Float> xMatch : xMatches.values()) { SortedSet<Float> yMatches = xMatch.subSet(textY - tolerance, textY + tolerance); if (!yMatches.isEmpty()) { suppressCharacter = true; break; } } if (!suppressCharacter) { TreeSet<Float> ySet = sameTextCharacters.get(textX); if (ySet == null) { ySet = new TreeSet<Float>(); sameTextCharacters.put(textX, ySet); } ySet.add(textY); showCharacter = true; } } if (showCharacter) { // if we are showing the character then we need to determine which article it belongs to int foundArticleDivisionIndex = -1; int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1; int notFoundButFirstLeftArticleDivisionIndex = -1; int notFoundButFirstAboveArticleDivisionIndex = -1; float x = text.getX(); float y = text.getY(); if (shouldSeparateByBeads) { for (int i = 0; i < beadRectangles.size() && foundArticleDivisionIndex == -1; i++) { PDRectangle rect = beadRectangles.get(i); if (rect != null) { if (rect.contains(x, y)) { foundArticleDivisionIndex = i * 2 + 1; } else if ((x < rect.getLowerLeftX() || y < rect.getUpperRightY()) && notFoundButFirstLeftAndAboveArticleDivisionIndex == -1) { notFoundButFirstLeftAndAboveArticleDivisionIndex = i * 2; } else if (x < rect.getLowerLeftX() && notFoundButFirstLeftArticleDivisionIndex == -1) { notFoundButFirstLeftArticleDivisionIndex = i * 2; } else if (y < rect.getUpperRightY() && notFoundButFirstAboveArticleDivisionIndex == -1) { notFoundButFirstAboveArticleDivisionIndex = i * 2; } } else { foundArticleDivisionIndex = 0; } } } else { foundArticleDivisionIndex = 0; } int articleDivisionIndex; if (foundArticleDivisionIndex != -1) { articleDivisionIndex = foundArticleDivisionIndex; } else if (notFoundButFirstLeftAndAboveArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex; } else if (notFoundButFirstLeftArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex; } else if (notFoundButFirstAboveArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex; } else { articleDivisionIndex = charactersByArticle.size() - 1; } List<TextPosition> textList = charactersByArticle.get(articleDivisionIndex); // In the wild, some PDF encoded documents put diacritics (accents on // top of characters) into a separate Tj element. When displaying them // graphically, the two chunks get overlayed. With text output though, // we need to do the overlay. This code recombines the diacritic with // its associated character if the two are consecutive. if (textList.isEmpty()) { textList.add(text); } else { // test if we overlap the previous entry. // Note that we are making an assumption that we need to only look back // one TextPosition to find what we are overlapping. // This may not always be true. */ TextPosition previousTextPosition = textList.get(textList.size() - 1); if (text.isDiacritic() && previousTextPosition.contains(text)) { previousTextPosition.mergeDiacritic(text); } // If the previous TextPosition was the diacritic, merge it into this // one and remove it from the list. else if (previousTextPosition.isDiacritic() && text.contains(previousTextPosition)) { text.mergeDiacritic(previousTextPosition); textList.remove(textList.size() - 1); textList.add(text); } else { textList.add(text); } } } }
From source file:com.vns.pdf.impl.PdfDocument.java
License:Apache License
private List<Annotation> parseAnnotation(PDPage pdPage) throws IOException { List<Annotation> annotations = new ArrayList<>(); for (PDAnnotation annt : pdPage.getAnnotations()) { if (annt instanceof PDAnnotationLink) { PDAnnotationLink link = (PDAnnotationLink) annt; PDRectangle rect = link.getRectangle(); float x = rect.getLowerLeftX(); float y = rect.getUpperRightY(); float width = rect.getWidth(); float height = rect.getHeight(); int rotation = pdPage.getRotation(); if (rotation == 0) { PDRectangle pageSize = pdPage.getMediaBox(); y = pageSize.getHeight() - y; } else if (rotation == 90) { //do nothing }// ww w . ja v a 2 s . c o m ActionData actionData = parsePDAction(link.getAction()); if (actionData == null) { actionData = parsePDDestination(link.getDestination()); } if (actionData != null) { Annotation a = new Annotation(x, y, width, height, actionData.destX, actionData.destY, actionData.destPage, actionData.destZoom); annotations.add(a); } } } return annotations; }
From source file:com.yiyihealth.tools.test.DrawPrintTextLocations.java
License:Apache License
private void stripPage(int page) throws IOException { PDFRenderer pdfRenderer = new PDFRenderer(document); image = pdfRenderer.renderImage(page, SCALE); PDPage pdPage = document.getPage(page); PDRectangle cropBox = pdPage.getCropBox(); // flip y-axis flipAT = new AffineTransform(); flipAT.translate(0, pdPage.getBBox().getHeight()); flipAT.scale(1, -1);/*from www . j a v a 2 s.c om*/ // page may be rotated rotateAT = new AffineTransform(); int rotation = pdPage.getRotation(); if (rotation != 0) { PDRectangle mediaBox = pdPage.getMediaBox(); switch (rotation) { case 90: rotateAT.translate(mediaBox.getHeight(), 0); break; case 270: rotateAT.translate(0, mediaBox.getWidth()); break; case 180: rotateAT.translate(mediaBox.getWidth(), mediaBox.getHeight()); break; default: break; } rotateAT.rotate(Math.toRadians(rotation)); } g2d = image.createGraphics(); g2d.setStroke(new BasicStroke(0.1f)); g2d.scale(SCALE, SCALE); setStartPage(page + 1); setEndPage(page + 1); Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream()); writeText(document, dummy); // beads in green g2d.setStroke(new BasicStroke(0.4f)); List<PDThreadBead> pageArticles = pdPage.getThreadBeads(); for (PDThreadBead bead : pageArticles) { PDRectangle r = bead.getRectangle(); GeneralPath p = r .transform(Matrix.getTranslateInstance(-cropBox.getLowerLeftX(), cropBox.getLowerLeftY())); Shape s = flipAT.createTransformedShape(p); s = rotateAT.createTransformedShape(s); g2d.setColor(Color.green); g2d.draw(s); } g2d.dispose(); String imageFilename = filename; int pt = imageFilename.lastIndexOf('.'); imageFilename = imageFilename.substring(0, pt) + "-marked-" + (page + 1) + ".png"; ImageIO.write(image, "png", new File(imageFilename)); }
From source file:com.zilbo.flamingSailor.TE.PDFParser.java
License:Apache License
@Override protected void endPage(PDPage page) throws IOException { super.endPage(page); int pieceID = 0; Map<String, Map<Integer, Long>> fontCounts = new HashMap<>(); List<TextPiece> wordsOfThisPage = new ArrayList<>(); for (List<TextPosition> aCharactersByArticle : charactersByArticle) { // int len = aCharactersByArticle.size(); for (TextPosition t : aCharactersByArticle) { // copy information TextPiece w = new TextPiece(pieceID++); PDFont font = t.getFont();// ww w . j av a2 s.c om PDFontDescriptor fontDescriptor = font.getFontDescriptor(); // w.setFontDescriptor(fontDescriptor); if (fontDescriptor == null) { w.setFontName("UNKNOWN"); } else { w.setFontName(fontDescriptor.getFontName()); } /* * 100: a simple step to fix the font size to the normal range, for those documents in unknown codes that PDFBox can not process now */ if (t.getFontSize() < 0.3 && t.getYScale() <= 1.0) { w.setFontSize(t.getFontSize() * 100); w.setHeight(Math.max(t.getYScale(), t.getFontSize()) * 100); w.setXScale(t.getXScale()); w.setYScale(t.getYScale()); } else { if (t.getYScale() < 0.3 && t.getFontSize() <= 1.0) { w.setYScale(t.getYScale() * 100); w.setXScale(t.getXScale() * 100); w.setHeight(Math.max(t.getYScale() * 100, t.getFontSize())); } else { w.setFontSize(t.getFontSize()); w.setHeight(Math.max(t.getYScale(), t.getFontSize())); w.setXScale(t.getXScale()); w.setYScale(t.getYScale()); } } Map<Integer, Long> counts = fontCounts.get(w.getFontName()); if (counts == null) { counts = new HashMap<>(); fontCounts.put(w.getFontName(), counts); } Long count = counts.get((int) Math.round(w.getHeight())); if (count == null) { count = 1L; } else { count += 1L; } counts.put((int) Math.round(w.getHeight()), count); w.setWidth(Math.abs(t.getWidth())); w.setGeom(t.getX(), t.getY(), w.getWidth(), w.getHeight()); w.setText(t.getCharacter()); w.setWidthOfSpace(t.getWidthOfSpace()); wordsOfThisPage.add(w); } } currentPage.processPage(wordsOfThisPage, fontCounts); currentPage.setText(outString.getBuffer().toString()); outString.getBuffer().setLength(0); List<PDAnnotation> annotations = page.getAnnotations(); for (PDAnnotation annotation : annotations) { if (annotation instanceof PDAnnotationLink) { PDAnnotationLink l = (PDAnnotationLink) annotation; PDRectangle rect = l.getRectangle(); PDDestination dest = l.getDestination(); if (dest instanceof PDPageXYZDestination) { PDPageXYZDestination xyzDestination = (PDPageXYZDestination) dest; PDPage pageDest = ((PDPageXYZDestination) dest).getPage(); if (rect != null) { if (xyzDestination.getPageNumber() < 0) { int pageNumber = allpages.indexOf(pageDest) + 1; Rectangle2D hotbox = new Rectangle2D.Double(rect.getLowerLeftX(), rect.getLowerLeftY(), (rect.getUpperRightX() - rect.getLowerLeftX()), (rect.getUpperRightY() - rect.getLowerLeftY())); Point2D toPoint = new Point2D.Double(xyzDestination.getLeft(), xyzDestination.getTop()); currentPage.addLink(new PDLink(hotbox, pageNumber, toPoint)); } } } } } /* The following code is REALLY raw. initial testing seemed to show memory leaks, and was REALLY slow. PDResources r = page.getResources(); Map<String, PDXObjectImage> images = r.getImages(); for (Map.Entry<String, PDXObjectImage> e : images.entrySet()) { BufferedImage bi = null; try { // currentPage.addImage(bi); // (e.getValue()).write2file("/tmp/II" + e.getKey()); if (e.getValue() instanceof PDJpeg) { PDJpeg jpg = (PDJpeg) e.getValue(); bi = jpg.getRGBImage(); ColorSpace cs = bi.getColorModel().getColorSpace(); File jpgFile = new File("/tmp/II" + e.getKey() + ".jpg"); if (cs instanceof ColorSpaceCMYK) { logger.info("Ignoring image with CMYK color space"); } else { // ImageIO.write(bi, "jpg", jpgFile); jpg.write2file("/tmp/II"+ e.getKey()); } } else { (e.getValue()).write2file("/tmp/II" + e.getKey()); } } catch (Exception ee) { logger.info("can't read image ;-(", ee); } } */ textPageList.add(currentPage); currentPage = null; }