List of usage examples for org.apache.pdfbox.pdmodel.font PDFontDescriptor getFontName
public String getFontName()
From source file:com.zilbo.flamingSailor.TE.PDFParser.java
License:Apache License
@Override protected void endPage(PDPage page) throws IOException { super.endPage(page); int pieceID = 0; Map<String, Map<Integer, Long>> fontCounts = new HashMap<>(); List<TextPiece> wordsOfThisPage = new ArrayList<>(); for (List<TextPosition> aCharactersByArticle : charactersByArticle) { // int len = aCharactersByArticle.size(); for (TextPosition t : aCharactersByArticle) { // copy information TextPiece w = new TextPiece(pieceID++); PDFont font = t.getFont();// w ww . j a v a 2 s . c o m PDFontDescriptor fontDescriptor = font.getFontDescriptor(); // w.setFontDescriptor(fontDescriptor); if (fontDescriptor == null) { w.setFontName("UNKNOWN"); } else { w.setFontName(fontDescriptor.getFontName()); } /* * 100: a simple step to fix the font size to the normal range, for those documents in unknown codes that PDFBox can not process now */ if (t.getFontSize() < 0.3 && t.getYScale() <= 1.0) { w.setFontSize(t.getFontSize() * 100); w.setHeight(Math.max(t.getYScale(), t.getFontSize()) * 100); w.setXScale(t.getXScale()); w.setYScale(t.getYScale()); } else { if (t.getYScale() < 0.3 && t.getFontSize() <= 1.0) { w.setYScale(t.getYScale() * 100); w.setXScale(t.getXScale() * 100); w.setHeight(Math.max(t.getYScale() * 100, t.getFontSize())); } else { w.setFontSize(t.getFontSize()); w.setHeight(Math.max(t.getYScale(), t.getFontSize())); w.setXScale(t.getXScale()); w.setYScale(t.getYScale()); } } Map<Integer, Long> counts = fontCounts.get(w.getFontName()); if (counts == null) { counts = new HashMap<>(); fontCounts.put(w.getFontName(), counts); } Long count = counts.get((int) Math.round(w.getHeight())); if (count == null) { count = 1L; } else { count += 1L; } counts.put((int) Math.round(w.getHeight()), count); w.setWidth(Math.abs(t.getWidth())); w.setGeom(t.getX(), t.getY(), w.getWidth(), w.getHeight()); w.setText(t.getCharacter()); w.setWidthOfSpace(t.getWidthOfSpace()); wordsOfThisPage.add(w); } } currentPage.processPage(wordsOfThisPage, fontCounts); currentPage.setText(outString.getBuffer().toString()); outString.getBuffer().setLength(0); List<PDAnnotation> annotations = page.getAnnotations(); for (PDAnnotation annotation : annotations) { if (annotation instanceof PDAnnotationLink) { PDAnnotationLink l = (PDAnnotationLink) annotation; PDRectangle rect = l.getRectangle(); PDDestination dest = l.getDestination(); if (dest instanceof PDPageXYZDestination) { PDPageXYZDestination xyzDestination = (PDPageXYZDestination) dest; PDPage pageDest = ((PDPageXYZDestination) dest).getPage(); if (rect != null) { if (xyzDestination.getPageNumber() < 0) { int pageNumber = allpages.indexOf(pageDest) + 1; Rectangle2D hotbox = new Rectangle2D.Double(rect.getLowerLeftX(), rect.getLowerLeftY(), (rect.getUpperRightX() - rect.getLowerLeftX()), (rect.getUpperRightY() - rect.getLowerLeftY())); Point2D toPoint = new Point2D.Double(xyzDestination.getLeft(), xyzDestination.getTop()); currentPage.addLink(new PDLink(hotbox, pageNumber, toPoint)); } } } } } /* The following code is REALLY raw. initial testing seemed to show memory leaks, and was REALLY slow. PDResources r = page.getResources(); Map<String, PDXObjectImage> images = r.getImages(); for (Map.Entry<String, PDXObjectImage> e : images.entrySet()) { BufferedImage bi = null; try { // currentPage.addImage(bi); // (e.getValue()).write2file("/tmp/II" + e.getKey()); if (e.getValue() instanceof PDJpeg) { PDJpeg jpg = (PDJpeg) e.getValue(); bi = jpg.getRGBImage(); ColorSpace cs = bi.getColorModel().getColorSpace(); File jpgFile = new File("/tmp/II" + e.getKey() + ".jpg"); if (cs instanceof ColorSpaceCMYK) { logger.info("Ignoring image with CMYK color space"); } else { // ImageIO.write(bi, "jpg", jpgFile); jpg.write2file("/tmp/II"+ e.getKey()); } } else { (e.getValue()).write2file("/tmp/II" + e.getKey()); } } catch (Exception ee) { logger.info("can't read image ;-(", ee); } } */ textPageList.add(currentPage); currentPage = null; }
From source file:net.padaf.preflight.font.FontMetaDataValidation.java
License:Apache License
/** * Value of the dc:title must be the same as the FontName in the font * descriptor./*w ww .j av a2 s . c o m*/ * * @param metadata * XMPMetaData of the Font File Stream * @param fontDesc * The FontDescriptor dictionary * @param ve * the list of validation error to update if the validation fails * @throws ValidationException */ public boolean analyseFontName(XMPMetadata metadata, PDFontDescriptor fontDesc, List<ValidationError> ve) throws ValidationException { String fontName = fontDesc.getFontName(); String noSubSetName = fontName; if (AbstractFontValidator.isSubSet(fontName)) { noSubSetName = fontName.split(AbstractFontValidator.getSubSetPatternDelimiter())[1]; } DublinCoreSchema dc = metadata.getDublinCoreSchema(); if (dc.getTitle() != null) { String defaultTitle = dc.getTitleValue("x-default"); if (defaultTitle != null) { if (!defaultTitle.equals(fontName) && (noSubSetName != null && !defaultTitle.equals(noSubSetName))) { StringBuilder sb = new StringBuilder(80); sb.append("FontName").append( " present in the FontDescriptor dictionary doesn't match with XMP information dc:title of the Font File Stream."); ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb.toString())); return false; } // --- default value is the right one return true; } else { Iterator<AbstractField> it = dc.getTitle().getContainer().getAllProperties().iterator(); boolean empty = true; while (it.hasNext()) { empty = false; AbstractField tmp = it.next(); if (tmp != null && tmp instanceof TextType) { if (((TextType) tmp).getStringValue().equals(fontName) || (noSubSetName != null && ((TextType) tmp).getStringValue().equals(noSubSetName))) { // value found, return return true; } } } // title doesn't match, it is an error. StringBuilder sb = new StringBuilder(80); sb.append("FontName"); if (empty) { sb.append( " present in the FontDescriptor dictionary can't be found in XMP information the Font File Stream."); ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PROPERTY_MISSING, sb.toString())); } else { sb.append( " present in the FontDescriptor dictionary doesn't match with XMP information dc:title of the Font File Stream."); ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb.toString())); } return false; } } // ---- dc:title is required ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PROPERTY_MISSING, "dc:title is missing from the FontFile MetaData")); return false; }
From source file:net.timendum.pdf.StatisticParser.java
License:Open Source License
public boolean isItalic(PDFontDescriptor descriptor) { if (descriptor.getItalicAngle() != 0f) { return true; }/* ww w .j a v a 2 s .co m*/ if ((descriptor.getFlags() & FLAG_ITALIC) == FLAG_ITALIC) { return true; } if (descriptor.getFontName() != null && descriptor.getFontName().indexOf("Italic") > -1) { return true; } return false; }
From source file:net.timendum.pdf.StatisticParser.java
License:Open Source License
public boolean isBold(PDFontDescriptor descriptor) { if (descriptor.getFontWeight() > averangeFontWeight) { return true; }// www .j ava 2 s.c o m if ((descriptor.getFlags() & FLAG_FORCE_BOLD) == FLAG_FORCE_BOLD) { return true; } if (descriptor.getFontName() != null && descriptor.getFontName().indexOf("Bold") > -1) { return true; } return false; }
From source file:org.apache.padaf.preflight.font.FontMetaDataValidation.java
License:Apache License
/** * Value of the dc:title must be the same as the FontName in the font * descriptor.// www . j a v a 2 s . co m * * @param metadata * XMPMetaData of the Font File Stream * @param fontDesc * The FontDescriptor dictionary * @param ve * the list of validation error to update if the validation fails * @throws ValidationException */ public boolean analyseFontName(XMPMetadata metadata, PDFontDescriptor fontDesc, List<ValidationError> ve) throws ValidationException { String fontName = fontDesc.getFontName(); String noSubSetName = fontName; if (AbstractFontValidator.isSubSet(fontName)) { noSubSetName = fontName.split(AbstractFontValidator.getSubSetPatternDelimiter())[1]; } DublinCoreSchema dc = metadata.getDublinCoreSchema(); if (dc != null) { if (dc.getTitle() != null) { String defaultTitle = dc.getTitleValue("x-default"); if (defaultTitle != null) { if (!defaultTitle.equals(fontName) && (noSubSetName != null && !defaultTitle.equals(noSubSetName))) { StringBuilder sb = new StringBuilder(80); sb.append("FontName").append( " present in the FontDescriptor dictionary doesn't match with XMP information dc:title of the Font File Stream."); ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb.toString())); return false; } // --- default value is the right one return true; } else { Iterator<AbstractField> it = dc.getTitle().getContainer().getAllProperties().iterator(); boolean empty = true; while (it.hasNext()) { empty = false; AbstractField tmp = it.next(); if (tmp != null && tmp instanceof TextType) { if (((TextType) tmp).getStringValue().equals(fontName) || (noSubSetName != null && ((TextType) tmp).getStringValue().equals(noSubSetName))) { // value found, return return true; } } } // title doesn't match, it is an error. StringBuilder sb = new StringBuilder(80); sb.append("FontName"); if (empty) { sb.append( " present in the FontDescriptor dictionary can't be found in XMP information the Font File Stream."); ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PROPERTY_MISSING, sb.toString())); } else { sb.append( " present in the FontDescriptor dictionary doesn't match with XMP information dc:title of the Font File Stream."); ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb.toString())); } return false; } } } return true; }
From source file:org.xmlcml.font.NonStandardFontManager.java
License:Apache License
private String getFontName(PDFont pdFont) { String fontName;//from www . j a v a2 s . c om AMIFont amiFont; PDFontDescriptor fd = AMIFont.getFontDescriptorOrDescendantFontDescriptor(pdFont); if (fd == null) { if (nullFontDescriptorReport) { LOG.error("****************** Null Font Descriptor : " + pdFont + "\n FURTHER ERRORS HIDDEN"); nullFontDescriptorReport = false; } } if (fd == null) { amiFont = this.lookupOrCreateFont(0, (COSDictionary) pdFont.getCOSObject()); fontName = amiFont.getFontName(); if (fontName == null) { throw new RuntimeException("No currentFontName"); } } else { fontName = fd.getFontName(); } return fontName; }
From source file:org.xmlcml.pdf2svg.AMIFont.java
License:Apache License
public static PDFontDescriptor getDescendantFontDescriptor(PDFont pdFont) { PDFontDescriptor fd = null; PDFont descendantFont = getFirstDescendantFont(pdFont); fd = (descendantFont == null) ? null : descendantFont.getFontDescriptor(); LOG.trace("fd (" + fd.getFontName() + ") " + fd); return fd;//from www . ja v a2 s . c o m }