List of usage examples for org.apache.pdfbox.pdmodel.common PDRectangle getLowerLeftX
public float getLowerLeftX()
From source file:de.berber.kindle.annotator.lib.Comment.java
License:Apache License
@Override protected PDAnnotation toPDAnnotation(final @Nonnull PDDocumentOutline documentOutline, final @Nonnull PDPage page) { LOG.info("Creating annotation " + xPositionFactor + "/" + yPositionFactor + " -> " + text); // Create annotation text with background color final PDGamma pdColor = getColor(); final PDAnnotationText textAnnotation = new PDAnnotationText(); textAnnotation.setContents(getText()); textAnnotation.setColour(pdColor);//from w w w. j a v a 2s . c om // set the text position final PDRectangle cropBox = page.getTrimBox(); final PDRectangle position = new PDRectangle(); position.setLowerLeftX((float) (cropBox.getLowerLeftX() + xPositionFactor * (cropBox.getUpperRightX() - cropBox.getLowerLeftX()))); position.setUpperRightX((float) (cropBox.getLowerLeftX() + xPositionFactor * (cropBox.getUpperRightX() - cropBox.getLowerLeftX()))); position.setUpperRightY((float) (cropBox.getUpperRightY() - yPositionFactor * (cropBox.getUpperRightY() - cropBox.getLowerLeftY()))); position.setLowerLeftY((float) (cropBox.getUpperRightY() - yPositionFactor * (cropBox.getUpperRightY() - cropBox.getLowerLeftY()))); textAnnotation.setRectangle(position); return textAnnotation; }
From source file:de.berber.kindle.annotator.lib.Marking.java
License:Apache License
@Override protected PDAnnotation toPDAnnotation(final PDDocumentOutline documentOutline, final PDPage page) { LOG.info("Creating marking " + leftXPositionFactor + "/" + lowerYPositionFactor + " -> " + rightXPositionFactor + "/" + upperYPositionFactor); // create highlighted area final PDGamma pdColor = getColor(); // final PDFont font = PDType1Font.HELVETICA_BOLD; // float textHeight = font.getFontHeight("Hg".getBytes(), 0, 2); final PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup( PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT); txtMark.setColour(pdColor);/*w w w . ja v a 2 s . c om*/ txtMark.setConstantOpacity(opacity); if (comment != null) { // set comment if available txtMark.setContents(comment.getText()); } // Set the rectangle containing the markup final PDRectangle cropBox = page.getTrimBox(); final PDRectangle position = new PDRectangle(); position.setLowerLeftX((float) (cropBox.getLowerLeftX() + leftXPositionFactor * (cropBox.getUpperRightX() - cropBox.getLowerLeftX()))); position.setUpperRightX((float) (cropBox.getLowerLeftX() + rightXPositionFactor * (cropBox.getUpperRightX() - cropBox.getLowerLeftX()))); position.setLowerLeftY((float) (cropBox.getUpperRightY() - (lowerYPositionFactor + ((upperYPositionFactor - lowerYPositionFactor == 0.0) ? 0.025 : 0.00)) * (cropBox.getUpperRightY() - cropBox.getLowerLeftY()))); position.setUpperRightY((float) (cropBox.getUpperRightY() - (upperYPositionFactor) * (cropBox.getUpperRightY() - cropBox.getLowerLeftY()))); txtMark.setRectangle(position); // work out the points forming the four corners of the annotations // set out in anti clockwise form (Completely wraps the text) // OK, the below doesn't match that description. // It's what acrobat 7 does and displays properly! float[] quads = new float[8]; quads[0] = position.getLowerLeftX(); // x1 quads[1] = position.getUpperRightY(); // y1 quads[2] = position.getUpperRightX(); // x2 quads[3] = position.getUpperRightY(); // y2 quads[4] = position.getLowerLeftX(); // x3 quads[5] = position.getLowerLeftY(); // y3 quads[6] = position.getUpperRightX(); // x4 quads[7] = position.getLowerLeftY(); // y5 txtMark.setQuadPoints(quads); return txtMark; }
From source file:de.tudarmstadt.ukp.dkpro.core.io.pdf.PdfLayoutEventStripper.java
License:Apache License
/** * This will show add a character to the list of characters to be printed to the text file. * //from w w w. j a v a 2 s. c o m * @param text * The description of the character to display. */ @Override protected void processTextPosition(final TextPosition text) { boolean showCharacter = true; if (suppressDuplicateOverlappingText) { showCharacter = false; final String textCharacter = text.getCharacter(); final float textX = text.getX(); final float textY = text.getY(); List<TextPosition> sameTextCharacters = characterListMapping.get(textCharacter); if (sameTextCharacters == null) { sameTextCharacters = new ArrayList<TextPosition>(); characterListMapping.put(textCharacter, sameTextCharacters); } // RDD - Here we compute the value that represents the end of the // rendered // text. This value is used to determine whether subsequent text // rendered // on the same line overwrites the current text. // // We subtract any positive padding to handle cases where extreme // amounts // of padding are applied, then backed off (not sure why this is // done, but there // are cases where the padding is on the order of 10x the character // width, and // the TJ just backs up to compensate after each character). Also, // we subtract // an amount to allow for kerning (a percentage of the width of the // last // character). // boolean suppressCharacter = false; final float tolerance = (text.getWidth() / textCharacter.length()) / 3.0f; for (int i = 0; i < sameTextCharacters.size() && textCharacter != null; i++) { final TextPosition character = sameTextCharacters.get(i); final String charCharacter = character.getCharacter(); final float charX = character.getX(); final float charY = character.getY(); // only want to suppress if (charCharacter != null && // charCharacter.equals( textCharacter ) && within(charX, textX, tolerance) && within(charY, textY, tolerance)) { suppressCharacter = true; } } if (!suppressCharacter && (text.getCharacter() != null) && (text.getCharacter().length() > 0)) { sameTextCharacters.add(text); showCharacter = true; } } if (showCharacter) { // if we are showing the character then we need to determine which // article it belongs to. int foundArticleDivisionIndex = -1; int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1; int notFoundButFirstLeftArticleDivisionIndex = -1; int notFoundButFirstAboveArticleDivisionIndex = -1; final float x = text.getX(); final float y = text.getY(); if (shouldSeparateByBeads) { for (int i = 0; i < pageArticles.size() && foundArticleDivisionIndex == -1; i++) { final PDThreadBead bead = pageArticles.get(i); if (bead != null) { final PDRectangle rect = bead.getRectangle(); if (rect.contains(x, y)) { foundArticleDivisionIndex = i * 2 + 1; } else if ((x < rect.getLowerLeftX() || y < rect.getUpperRightY()) && notFoundButFirstLeftAndAboveArticleDivisionIndex == -1) { notFoundButFirstLeftAndAboveArticleDivisionIndex = i * 2; } else if (x < rect.getLowerLeftX() && notFoundButFirstLeftArticleDivisionIndex == -1) { notFoundButFirstLeftArticleDivisionIndex = i * 2; } else if (y < rect.getUpperRightY() && notFoundButFirstAboveArticleDivisionIndex == -1) { notFoundButFirstAboveArticleDivisionIndex = i * 2; } } else { foundArticleDivisionIndex = 0; } } } else { foundArticleDivisionIndex = 0; } int articleDivisionIndex = -1; if (foundArticleDivisionIndex != -1) { articleDivisionIndex = foundArticleDivisionIndex; } else if (notFoundButFirstLeftAndAboveArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex; } else if (notFoundButFirstLeftArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex; } else if (notFoundButFirstAboveArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex; } else { articleDivisionIndex = charactersByArticle.size() - 1; } final List<TextPosition> textList = charactersByArticle.get(articleDivisionIndex); textList.add(text); } }
From source file:edu.isi.bmkeg.lapdf.extraction.LAPDFTextStripper.java
License:Apache License
/** * This will process a TextPosition object and add the * text to the list of characters on a page. It takes care of * overlapping text.//from ww w . ja va2 s . com * * @param text The text to process. */ protected void processTextPosition(TextPosition text) { boolean showCharacter = true; if (suppressDuplicateOverlappingText) { showCharacter = false; String textCharacter = text.getCharacter(); float textX = text.getX(); float textY = text.getY(); TreeMap<Float, TreeSet<Float>> sameTextCharacters = characterListMapping.get(textCharacter); if (sameTextCharacters == null) { sameTextCharacters = new TreeMap<Float, TreeSet<Float>>(); characterListMapping.put(textCharacter, sameTextCharacters); } // RDD - Here we compute the value that represents the end of the rendered // text. This value is used to determine whether subsequent text rendered // on the same line overwrites the current text. // // We subtract any positive padding to handle cases where extreme amounts // of padding are applied, then backed off (not sure why this is done, but there // are cases where the padding is on the order of 10x the character width, and // the TJ just backs up to compensate after each character). Also, we subtract // an amount to allow for kerning (a percentage of the width of the last // character). // boolean suppressCharacter = false; float tolerance = (text.getWidth() / textCharacter.length()) / 3.0f; SortedMap<Float, TreeSet<Float>> xMatches = sameTextCharacters.subMap(textX - tolerance, textX + tolerance); for (TreeSet<Float> xMatch : xMatches.values()) { SortedSet<Float> yMatches = xMatch.subSet(textY - tolerance, textY + tolerance); if (!yMatches.isEmpty()) { suppressCharacter = true; break; } } if (!suppressCharacter) { TreeSet<Float> ySet = sameTextCharacters.get(textX); if (ySet == null) { ySet = new TreeSet<Float>(); sameTextCharacters.put(textX, ySet); } ySet.add(textY); showCharacter = true; } } if (showCharacter) { //if we are showing the character then we need to determine which //article it belongs to. int foundArticleDivisionIndex = -1; int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1; int notFoundButFirstLeftArticleDivisionIndex = -1; int notFoundButFirstAboveArticleDivisionIndex = -1; float x = text.getX(); float y = text.getY(); if (shouldSeparateByBeads) { for (int i = 0; i < pageArticles.size() && foundArticleDivisionIndex == -1; i++) { PDThreadBead bead = (PDThreadBead) pageArticles.get(i); if (bead != null) { PDRectangle rect = bead.getRectangle(); if (rect.contains(x, y)) { foundArticleDivisionIndex = i * 2 + 1; } else if ((x < rect.getLowerLeftX() || y < rect.getUpperRightY()) && notFoundButFirstLeftAndAboveArticleDivisionIndex == -1) { notFoundButFirstLeftAndAboveArticleDivisionIndex = i * 2; } else if (x < rect.getLowerLeftX() && notFoundButFirstLeftArticleDivisionIndex == -1) { notFoundButFirstLeftArticleDivisionIndex = i * 2; } else if (y < rect.getUpperRightY() && notFoundButFirstAboveArticleDivisionIndex == -1) { notFoundButFirstAboveArticleDivisionIndex = i * 2; } } else { foundArticleDivisionIndex = 0; } } } else { foundArticleDivisionIndex = 0; } int articleDivisionIndex = -1; if (foundArticleDivisionIndex != -1) { articleDivisionIndex = foundArticleDivisionIndex; } else if (notFoundButFirstLeftAndAboveArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex; } else if (notFoundButFirstLeftArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex; } else if (notFoundButFirstAboveArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex; } else { articleDivisionIndex = charactersByArticle.size() - 1; } List<TextPosition> textList = (List<TextPosition>) charactersByArticle.get(articleDivisionIndex); /* In the wild, some PDF encoded documents put diacritics (accents on * top of characters) into a separate Tj element. When displaying them * graphically, the two chunks get overlayed. With text output though, * we need to do the overlay. This code recombines the diacritic with * its associated character if the two are consecutive. */ if (textList.isEmpty()) { textList.add(text); } else { /* test if we overlap the previous entry. * Note that we are making an assumption that we need to only look back * one TextPosition to find what we are overlapping. * This may not always be true. */ TextPosition previousTextPosition = (TextPosition) textList.get(textList.size() - 1); if (text.isDiacritic() && previousTextPosition.contains(text)) { previousTextPosition.mergeDiacritic(text, normalize); } /* If the previous TextPosition was the diacritic, merge it into this * one and remove it from the list. */ else if (previousTextPosition.isDiacritic() && text.contains(previousTextPosition)) { text.mergeDiacritic(previousTextPosition, normalize); textList.remove(textList.size() - 1); textList.add(text); } else { textList.add(text); } } } }
From source file:helper.pdfpreprocessing.pdf.TextHighlight.java
License:Apache License
private boolean markupMatch(Color color, PDPageContentStream contentStream, Match markingMatch, int height, boolean withId, PDPage page, String comment, boolean commentOnly) throws IOException { final List<PDRectangle> textBoundingBoxes = getTextBoundingBoxes(markingMatch.positions); if (textBoundingBoxes.size() > 0) { contentStream.setNonStrokingColor(color); for (PDRectangle textBoundingBox : textBoundingBoxes) { if (comment.isEmpty()) { contentStream.addRect(textBoundingBox.getLowerLeftX(), textBoundingBox.getLowerLeftY(), Math .max(Math.abs(textBoundingBox.getUpperRightX() - textBoundingBox.getLowerLeftX()), 10), height);// w w w .jav a 2s . co m contentStream.fill(); } if (withId) { PDFont font = PDType1Font.HELVETICA; contentStream.beginText(); contentStream.setFont(font, 5); contentStream.newLineAtOffset(textBoundingBox.getUpperRightX(), textBoundingBox.getUpperRightY()); contentStream.showText(markingMatch.str); contentStream.endText(); } if (!comment.isEmpty() && !commentOnly) { PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup( PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT); PDRectangle position = new PDRectangle(); position.setLowerLeftX(textBoundingBox.getLowerLeftX()); position.setLowerLeftY(textBoundingBox.getLowerLeftY()); position.setUpperRightX(textBoundingBox.getLowerLeftX() + Math .max(Math.abs(textBoundingBox.getUpperRightX() - textBoundingBox.getLowerLeftX()), 10)); position.setUpperRightY(textBoundingBox.getLowerLeftY() + 10); txtMark.setRectangle(position); float[] quads = new float[8]; quads[0] = position.getLowerLeftX(); // x1 quads[1] = position.getUpperRightY() - 2; // y1 quads[2] = position.getUpperRightX(); // x2 quads[3] = quads[1]; // y2 quads[4] = quads[0]; // x3 quads[5] = position.getLowerLeftY() - 2; // y3 quads[6] = quads[2]; // x4 quads[7] = quads[5]; // y5 txtMark.setQuadPoints(quads); txtMark.setConstantOpacity((float) 0.5); txtMark.setContents("Missing Assumption/s (" + markingMatch.str + "):\n" + comment); float[] colorArray = new float[] { 0, 0, 0 }; colorArray = color.getColorComponents(colorArray); PDColor hColor = new PDColor(colorArray, PDDeviceRGB.INSTANCE); txtMark.setColor(hColor); txtMark.setCreationDate(Calendar.getInstance()); txtMark.setTitlePopup("Assumption Error"); page.getAnnotations().add(txtMark); } else if (!comment.isEmpty() && commentOnly) { for (int i = 0; i < page.getAnnotations().size(); i++) { String extractedComment = page.getAnnotations().get(i).getContents(); if (extractedComment != null) { String commentID = extractedComment.substring(extractedComment.indexOf("(") + 1, extractedComment.indexOf(")")); if (markingMatch.str.equals(commentID) && extractedComment.contains(comment)) { page.getAnnotations().get(i).setContents(extractedComment + "\n" + comment); } } } } } return true; } return false; }
From source file:hightlighting.PDFTextAnnotator.java
License:Apache License
private float[] computeQuads(PDRectangle rect) { float[] quads = new float[8]; // top left/*from w w w . j av a 2 s .co m*/ quads[0] = rect.getLowerLeftX(); // x1 quads[1] = rect.getUpperRightY(); // y1 // bottom left quads[2] = quads[0]; // x2 quads[3] = rect.getLowerLeftY(); // y2 // top right quads[4] = rect.getUpperRightX(); // x3 quads[5] = quads[1]; // y3 // bottom right quads[6] = quads[4]; // x4 quads[7] = quads[3]; // y5 return quads; }
From source file:javaexample.RadialTextPdf.java
License:Open Source License
private void generatePage(PDDocument document) throws IOException { // Creates a new page. PDPage page = new PDPage(pageRect); document.addPage(page);//from w w w. jav a 2 s . com // Gets boundings of the page. PDRectangle rect = page.getMediaBox(); // Calculates the side of the square that fits into the page. float squareSide = Math.min(rect.getWidth(), rect.getHeight()); // Calculates the center point of the page. float centerX = (rect.getLowerLeftX() + rect.getUpperRightX()) / 2; float centerY = (rect.getLowerLeftY() + rect.getUpperRightY()) / 2; PDPageContentStream cos = new PDPageContentStream(document, page); // Creates the font for the radial text. PDFont font = PDType1Font.HELVETICA_BOLD; // Standard font float fontSize = squareSide / 30; float fontAscent = font.getFontDescriptor().getAscent() / 1000 * fontSize; // Calculates key values for the drawings. float textX = squareSide / 3.4F; // x of the text. float textY = -fontAscent / 2; // y of the text (for vertical centering of text). float lineToX = textX * 0.97F; // x destination for the line. float lineWidth = squareSide / 900; // width of lines. // Moves the origin (0,0) of the axes to the center of the page. cos.concatenate2CTM(AffineTransform.getTranslateInstance(centerX, centerY)); for (float degrees = 0; degrees < 360; degrees += 7.5) { double radians = degrees2Radians(degrees); // Creates a pure color with the hue based on the angle. Color textColor = Color.getHSBColor(degrees / 360.0F, 1, 1); // Saves the graphics state because the angle changes on each iteration. cos.saveGraphicsState(); // Rotates the axes by the angle expressed in radians. cos.concatenate2CTM(AffineTransform.getRotateInstance(radians)); // Draws a line from the center of the page. cos.setLineWidth(lineWidth); cos.moveTo(0, 0); cos.lineTo(lineToX, 0); cos.stroke(); // Draws the radial text. cos.beginText(); cos.setNonStrokingColor(textColor); cos.setFont(font, fontSize); cos.moveTextPositionByAmount(textX, textY); cos.drawString("PDF"); cos.endText(); // Restores the graphics state to remove rotation transformation. cos.restoreGraphicsState(); } cos.close(); }
From source file:net.bookinaction.ExtractAnnotations.java
License:Apache License
public void doJob(String job, Float[] pA) throws IOException { PDDocument document = null;//from w ww .ja va 2 s . c om Stamper s = new Stamper(); // utility class final String job_file = job + ".pdf"; final String dic_file = job + "-dict.txt"; final String new_job = job + "-new.pdf"; PrintWriter writer = new PrintWriter(dic_file); ImageLocationListener imageLocationsListener = new ImageLocationListener(); AnnotationMaker annotMaker = new AnnotationMaker(); try { document = PDDocument.load(new File(job_file)); int pageNum = 0; for (PDPage page : document.getPages()) { pageNum++; PDRectangle cropBox = page.getCropBox(); List<PDAnnotation> annotations = page.getAnnotations(); // extract image locations List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>(); imageLocationsListener.setImageRects(imageRects); imageLocationsListener.processPage(page); int im = 0; for (Rectangle2D pdImageRect : imageRects) { s.recordImage(writer, pageNum, "[im" + im + "]", (Rectangle2D.Float) pdImageRect); annotations.add(annotMaker.squareAnnotation(Color.YELLOW, (Rectangle2D.Float) pdImageRect, "[im" + im + "]")); im++; } PDFTextStripperByArea stripper = new PDFTextStripperByArea(); int j = 0; List<PDAnnotation> viableAnnots = new ArrayList(); for (PDAnnotation annot : annotations) { if (annot instanceof PDAnnotationTextMarkup || annot instanceof PDAnnotationLink) { stripper.addRegion(Integer.toString(j++), s.getAwtRect( s.adjustedRect(annot.getRectangle(), pA[0], pA[1], pA[2], pA[3]), cropBox)); viableAnnots.add(annot); } else if (annot instanceof PDAnnotationPopup || annot instanceof PDAnnotationText) { viableAnnots.add(annot); } } stripper.extractRegions(page); List<PDRectangle> rects = new ArrayList<PDRectangle>(); List<String> comments = new ArrayList<String>(); List<String> highlightTexts = new ArrayList<String>(); j = 0; for (PDAnnotation viableAnnot : viableAnnots) { if (viableAnnot instanceof PDAnnotationTextMarkup) { String highlightText = stripper.getTextForRegion(Integer.toString(j++)); String withoutCR = highlightText.replace((char) 0x0A, '^'); String comment = viableAnnot.getContents(); String colorString = String.format("%06x", viableAnnot.getColor().toRGB()); PDRectangle aRect = s.adjustedRect(viableAnnot.getRectangle(), pA[4], pA[5], pA[6], pA[7]); rects.add(aRect); comments.add(comment); highlightTexts.add(highlightText); s.recordTextMarkup(writer, pageNum, comment, withoutCR, aRect, colorString); } else if (viableAnnot instanceof PDAnnotationText) { String comment = viableAnnot.getContents(); String colorString = String.format("%06x", viableAnnot.getColor().toRGB()); for (Rectangle2D pdImageRect : imageRects) { if (pdImageRect.contains(viableAnnot.getRectangle().getLowerLeftX(), viableAnnot.getRectangle().getLowerLeftY())) { s.recordTextMarkup(writer, pageNum, comment, "", (Rectangle2D.Float) pdImageRect, colorString); annotations.add(annotMaker.squareAnnotation(Color.GREEN, (Rectangle2D.Float) pdImageRect, comment)); } ; } } } PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true); int i = 0; for (PDRectangle pdRect : rects) { String comment = comments.get(i); String highlightText = highlightTexts.get(i); //annotations.add(linkAnnotation(pdRect, comment, highlightText)); //annotations.add(annotationSquareCircle(pdRect, BLUE)); s.showBox(canvas, new Rectangle2D.Float(pdRect.getLowerLeftX(), pdRect.getUpperRightY(), pdRect.getWidth(), pdRect.getHeight()), cropBox, Color.BLUE); i++; } canvas.close(); } writer.close(); document.save(new_job); } finally { if (document != null) { document.close(); } } }
From source file:onyx.core.parser.PDFTextStripper.java
License:Apache License
/** * This will process a TextPosition object and add the * text to the list of characters on a page. It takes care of * overlapping text./* w w w .ja v a 2 s. c om*/ * * @param text The text to process. */ protected void processTextPosition(TextPosition text) { boolean showCharacter = true; if (suppressDuplicateOverlappingText) { showCharacter = false; String textCharacter = text.getCharacter(); float textX = text.getX(); float textY = text.getY(); TreeMap<Float, TreeSet<Float>> sameTextCharacters = characterListMapping.get(textCharacter); if (sameTextCharacters == null) { sameTextCharacters = new TreeMap<Float, TreeSet<Float>>(); characterListMapping.put(textCharacter, sameTextCharacters); } // RDD - Here we compute the value that represents the end of the rendered // text. This value is used to determine whether subsequent text rendered // on the same line overwrites the current text. // // We subtract any positive padding to handle cases where extreme amounts // of padding are applied, then backed off (not sure why this is done, but there // are cases where the padding is on the order of 10x the character width, and // the TJ just backs up to compensate after each character). Also, we subtract // an amount to allow for kerning (a percentage of the width of the last // character). // boolean suppressCharacter = false; float tolerance = (text.getWidth() / textCharacter.length()) / 3.0f; SortedMap<Float, TreeSet<Float>> xMatches = sameTextCharacters.subMap(textX - tolerance, textX + tolerance); for (TreeSet<Float> xMatch : xMatches.values()) { SortedSet<Float> yMatches = xMatch.subSet(textY - tolerance, textY + tolerance); if (!yMatches.isEmpty()) { suppressCharacter = true; break; } } if (!suppressCharacter) { TreeSet<Float> ySet = sameTextCharacters.get(textX); if (ySet == null) { ySet = new TreeSet<Float>(); sameTextCharacters.put(textX, ySet); } ySet.add(textY); showCharacter = true; } } if (showCharacter) { //if we are showing the character then we need to determine which //article it belongs to. int foundArticleDivisionIndex = -1; int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1; int notFoundButFirstLeftArticleDivisionIndex = -1; int notFoundButFirstAboveArticleDivisionIndex = -1; float x = text.getX(); float y = text.getY(); if (shouldSeparateByBeads) { for (int i = 0; i < pageArticles.size() && foundArticleDivisionIndex == -1; i++) { PDThreadBead bead = (PDThreadBead) pageArticles.get(i); if (bead != null) { PDRectangle rect = bead.getRectangle(); if (rect.contains(x, y)) { foundArticleDivisionIndex = i * 2 + 1; } else if ((x < rect.getLowerLeftX() || y < rect.getUpperRightY()) && notFoundButFirstLeftAndAboveArticleDivisionIndex == -1) { notFoundButFirstLeftAndAboveArticleDivisionIndex = i * 2; } else if (x < rect.getLowerLeftX() && notFoundButFirstLeftArticleDivisionIndex == -1) { notFoundButFirstLeftArticleDivisionIndex = i * 2; } else if (y < rect.getUpperRightY() && notFoundButFirstAboveArticleDivisionIndex == -1) { notFoundButFirstAboveArticleDivisionIndex = i * 2; } } else { foundArticleDivisionIndex = 0; } } } else { foundArticleDivisionIndex = 0; } int articleDivisionIndex = -1; if (foundArticleDivisionIndex != -1) { articleDivisionIndex = foundArticleDivisionIndex; } else if (notFoundButFirstLeftAndAboveArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex; } else if (notFoundButFirstLeftArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex; } else if (notFoundButFirstAboveArticleDivisionIndex != -1) { articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex; } else { articleDivisionIndex = charactersByArticle.size() - 1; } List<TextPosition> textList = (List<TextPosition>) charactersByArticle.get(articleDivisionIndex); /* In the wild, some PDF encoded documents put diacritics (accents on * top of characters) into a separate Tj element. When displaying them * graphically, the two chunks get overlayed. With text output though, * we need to do the overlay. This code recombines the diacritic with * its associated character if the two are consecutive. */ if (textList.isEmpty()) { textList.add(text); } else { /* test if we overlap the previous entry. * Note that we are making an assumption that we need to only look back * one TextPosition to find what we are overlapping. * This may not always be true. */ TextPosition previousTextPosition = (TextPosition) textList.get(textList.size() - 1); if (text.isDiacritic() && previousTextPosition.contains(text)) { previousTextPosition.mergeDiacritic(text, normalize); } /* If the previous TextPosition was the diacritic, merge it into this * one and remove it from the list. */ else if (previousTextPosition.isDiacritic() && text.contains(previousTextPosition)) { text.mergeDiacritic(previousTextPosition, normalize); textList.remove(textList.size() - 1); textList.add(text); } else { textList.add(text); } } } }
From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java
License:Apache License
/** * Creates a stream (from FOP's PDF library) from a PDF page parsed with PDFBox. * @param sourceDoc the source PDF the given page to be copied belongs to * @param page the page to transform into a stream * @param key value to use as key for the stream * @param atdoc adjustment for stream//from w w w.jav a2 s . c o m * @param fontinfo fonts * @param pos rectangle * @return the stream * @throws IOException if an I/O error occurs */ public String createStreamFromPDFBoxPage(PDDocument sourceDoc, PDPage page, String key, AffineTransform atdoc, FontInfo fontinfo, Rectangle pos) throws IOException { handleAnnotations(sourceDoc, page, atdoc); if (pageNumbers.containsKey(targetPage.getPageIndex())) { pageNumbers.get(targetPage.getPageIndex()).set(0, targetPage.makeReference()); } PDResources sourcePageResources = page.getResources(); PDStream pdStream = getContents(page); COSDictionary fonts = (COSDictionary) sourcePageResources.getCOSObject().getDictionaryObject(COSName.FONT); COSDictionary fontsBackup = null; UniqueName uniqueName = new UniqueName(key, sourcePageResources); String newStream = null; if (fonts != null && pdfDoc.isMergeFontsEnabled()) { fontsBackup = new COSDictionary(fonts); MergeFontsPDFWriter m = new MergeFontsPDFWriter(fonts, fontinfo, uniqueName, parentFonts, currentMCID); newStream = m.writeText(pdStream); // if (newStream != null) { // for (Object f : fonts.keySet().toArray()) { // COSDictionary fontdata = (COSDictionary)fonts.getDictionaryObject((COSName)f); // if (getUniqueFontName(fontdata) != null) { // fonts.removeItem((COSName)f); // } // } // } } if (newStream == null) { PDFWriter writer = new PDFWriter(uniqueName, currentMCID); newStream = writer.writeText(pdStream); currentMCID = writer.getCurrentMCID(); } pdStream = new PDStream(sourceDoc, new ByteArrayInputStream(newStream.getBytes("ISO-8859-1"))); mergeXObj(sourcePageResources.getCOSObject(), fontinfo, uniqueName); PDFDictionary pageResources = (PDFDictionary) cloneForNewDocument(sourcePageResources.getCOSObject()); PDFDictionary fontDict = (PDFDictionary) pageResources.get("Font"); if (fontDict != null && pdfDoc.isMergeFontsEnabled()) { for (Map.Entry<String, Typeface> fontEntry : fontinfo.getUsedFonts().entrySet()) { Typeface font = fontEntry.getValue(); if (font instanceof FOPPDFFont) { FOPPDFFont pdfFont = (FOPPDFFont) font; if (pdfFont.getRef() == null) { pdfFont.setRef(new PDFDictionary()); pdfDoc.assignObjectNumber(pdfFont.getRef()); } fontDict.put(fontEntry.getKey(), pdfFont.getRef()); } } } updateXObj(sourcePageResources.getCOSObject(), pageResources); if (fontsBackup != null) { sourcePageResources.getCOSObject().setItem(COSName.FONT, fontsBackup); } COSStream originalPageContents = pdStream.getCOSObject(); bindOptionalContent(sourceDoc); PDFStream pageStream; Set filter; // if (originalPageContents instanceof COSStreamArray) { // COSStreamArray array = (COSStreamArray)originalPageContents; // pageStream = new PDFStream(); // InputStream in = array.getUnfilteredStream(); // OutputStream out = pageStream.getBufferOutputStream(); // IOUtils.copyLarge(in, out); // filter = FILTER_FILTER; // } else { pageStream = (PDFStream) cloneForNewDocument(originalPageContents); filter = Collections.EMPTY_SET; // } if (pageStream == null) { pageStream = new PDFStream(); } if (originalPageContents != null) { transferDict(originalPageContents, pageStream, filter); } transferPageDict(fonts, uniqueName, sourcePageResources); PDRectangle mediaBox = page.getMediaBox(); PDRectangle cropBox = page.getCropBox(); PDRectangle viewBox = cropBox != null ? cropBox : mediaBox; //Handle the /Rotation entry on the page dict int rotation = PDFUtil.getNormalizedRotation(page); //Transform to FOP's user space float w = (float) pos.getWidth() / 1000f; float h = (float) pos.getHeight() / 1000f; if (rotation == 90 || rotation == 270) { float tmp = w; w = h; h = tmp; } atdoc.setTransform(AffineTransform.getScaleInstance(w / viewBox.getWidth(), h / viewBox.getHeight())); atdoc.translate(0, viewBox.getHeight()); atdoc.rotate(-Math.PI); atdoc.scale(-1, 1); atdoc.translate(-viewBox.getLowerLeftX(), -viewBox.getLowerLeftY()); rotate(rotation, viewBox, atdoc); StringBuilder boxStr = new StringBuilder(); boxStr.append(PDFNumber.doubleOut(mediaBox.getLowerLeftX())).append(' ') .append(PDFNumber.doubleOut(mediaBox.getLowerLeftY())).append(' ') .append(PDFNumber.doubleOut(mediaBox.getWidth())).append(' ') .append(PDFNumber.doubleOut(mediaBox.getHeight())).append(" re W n\n"); return boxStr.toString() + IOUtils.toString(pdStream.createInputStream(null), "ISO-8859-1"); }