Example usage for org.apache.pdfbox.pdmodel.font PDFontDescriptor getFontName

List of usage examples for org.apache.pdfbox.pdmodel.font PDFontDescriptor getFontName

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel.font PDFontDescriptor getFontName.

Prototype

public String getFontName() 

Source Link

Document

Get the font name.

Usage

From source file:com.zilbo.flamingSailor.TE.PDFParser.java

License:Apache License

@Override
protected void endPage(PDPage page) throws IOException {
    super.endPage(page);
    int pieceID = 0;
    Map<String, Map<Integer, Long>> fontCounts = new HashMap<>();
    List<TextPiece> wordsOfThisPage = new ArrayList<>();

    for (List<TextPosition> aCharactersByArticle : charactersByArticle) {
        //   int len = aCharactersByArticle.size();
        for (TextPosition t : aCharactersByArticle) {
            // copy information
            TextPiece w = new TextPiece(pieceID++);
            PDFont font = t.getFont();//  w ww  .  j  a  v  a  2  s  .  c  o m
            PDFontDescriptor fontDescriptor = font.getFontDescriptor();

            //   w.setFontDescriptor(fontDescriptor);
            if (fontDescriptor == null) {
                w.setFontName("UNKNOWN");
            } else {
                w.setFontName(fontDescriptor.getFontName());
            }

            /*
            * 100: a simple step to fix the font size to the normal range, for those documents in unknown codes that PDFBox can not process now
            */
            if (t.getFontSize() < 0.3 && t.getYScale() <= 1.0) {
                w.setFontSize(t.getFontSize() * 100);
                w.setHeight(Math.max(t.getYScale(), t.getFontSize()) * 100);
                w.setXScale(t.getXScale());
                w.setYScale(t.getYScale());
            } else {
                if (t.getYScale() < 0.3 && t.getFontSize() <= 1.0) {
                    w.setYScale(t.getYScale() * 100);
                    w.setXScale(t.getXScale() * 100);
                    w.setHeight(Math.max(t.getYScale() * 100, t.getFontSize()));
                } else {
                    w.setFontSize(t.getFontSize());
                    w.setHeight(Math.max(t.getYScale(), t.getFontSize()));
                    w.setXScale(t.getXScale());
                    w.setYScale(t.getYScale());
                }
            }

            Map<Integer, Long> counts = fontCounts.get(w.getFontName());
            if (counts == null) {
                counts = new HashMap<>();
                fontCounts.put(w.getFontName(), counts);
            }
            Long count = counts.get((int) Math.round(w.getHeight()));
            if (count == null) {
                count = 1L;
            } else {
                count += 1L;
            }
            counts.put((int) Math.round(w.getHeight()), count);

            w.setWidth(Math.abs(t.getWidth()));
            w.setGeom(t.getX(), t.getY(), w.getWidth(), w.getHeight());

            w.setText(t.getCharacter());

            w.setWidthOfSpace(t.getWidthOfSpace());
            wordsOfThisPage.add(w);
        }
    }
    currentPage.processPage(wordsOfThisPage, fontCounts);
    currentPage.setText(outString.getBuffer().toString());
    outString.getBuffer().setLength(0);
    List<PDAnnotation> annotations = page.getAnnotations();

    for (PDAnnotation annotation : annotations) {
        if (annotation instanceof PDAnnotationLink) {
            PDAnnotationLink l = (PDAnnotationLink) annotation;
            PDRectangle rect = l.getRectangle();
            PDDestination dest = l.getDestination();
            if (dest instanceof PDPageXYZDestination) {
                PDPageXYZDestination xyzDestination = (PDPageXYZDestination) dest;
                PDPage pageDest = ((PDPageXYZDestination) dest).getPage();

                if (rect != null) {
                    if (xyzDestination.getPageNumber() < 0) {
                        int pageNumber = allpages.indexOf(pageDest) + 1;
                        Rectangle2D hotbox = new Rectangle2D.Double(rect.getLowerLeftX(), rect.getLowerLeftY(),
                                (rect.getUpperRightX() - rect.getLowerLeftX()),
                                (rect.getUpperRightY() - rect.getLowerLeftY()));
                        Point2D toPoint = new Point2D.Double(xyzDestination.getLeft(), xyzDestination.getTop());
                        currentPage.addLink(new PDLink(hotbox, pageNumber, toPoint));
                    }
                }
            }
        }
    }

    /*
     The following code is REALLY raw.
     initial testing seemed to show memory leaks, and was REALLY slow.
            
    PDResources r = page.getResources();
    Map<String, PDXObjectImage> images = r.getImages();
    for (Map.Entry<String, PDXObjectImage> e : images.entrySet()) {
    BufferedImage bi = null;
    try {
            
        //   currentPage.addImage(bi);
            
        //    (e.getValue()).write2file("/tmp/II" + e.getKey());
        if (e.getValue() instanceof PDJpeg) {
            PDJpeg jpg = (PDJpeg) e.getValue();
            bi = jpg.getRGBImage();
            ColorSpace cs = bi.getColorModel().getColorSpace();
            File jpgFile = new File("/tmp/II" + e.getKey() + ".jpg");
            
            if (cs instanceof ColorSpaceCMYK) {
            
                logger.info("Ignoring image with CMYK color space");
            } else {
               // ImageIO.write(bi, "jpg", jpgFile);
                jpg.write2file("/tmp/II"+ e.getKey());
            }
            
        } else {
            (e.getValue()).write2file("/tmp/II" + e.getKey());
        }
    } catch (Exception ee) {
        logger.info("can't read image ;-(", ee);
    }
            
    }
    */

    textPageList.add(currentPage);
    currentPage = null;
}

From source file:net.padaf.preflight.font.FontMetaDataValidation.java

License:Apache License

/**
 * Value of the dc:title must be the same as the FontName in the font
 * descriptor./*w ww .j av  a2  s . c o m*/
 * 
 * @param metadata
 *          XMPMetaData of the Font File Stream
 * @param fontDesc
 *          The FontDescriptor dictionary
 * @param ve
 *          the list of validation error to update if the validation fails
 * @throws ValidationException
 */
public boolean analyseFontName(XMPMetadata metadata, PDFontDescriptor fontDesc, List<ValidationError> ve)
        throws ValidationException {
    String fontName = fontDesc.getFontName();
    String noSubSetName = fontName;
    if (AbstractFontValidator.isSubSet(fontName)) {
        noSubSetName = fontName.split(AbstractFontValidator.getSubSetPatternDelimiter())[1];
    }

    DublinCoreSchema dc = metadata.getDublinCoreSchema();
    if (dc.getTitle() != null) {
        String defaultTitle = dc.getTitleValue("x-default");
        if (defaultTitle != null) {

            if (!defaultTitle.equals(fontName)
                    && (noSubSetName != null && !defaultTitle.equals(noSubSetName))) {
                StringBuilder sb = new StringBuilder(80);
                sb.append("FontName").append(
                        " present in the FontDescriptor dictionary doesn't match with XMP information dc:title of the Font File Stream.");
                ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb.toString()));
                return false;
            }

            // --- default value is the right one
            return true;
        } else {
            Iterator<AbstractField> it = dc.getTitle().getContainer().getAllProperties().iterator();
            boolean empty = true;
            while (it.hasNext()) {
                empty = false;
                AbstractField tmp = it.next();
                if (tmp != null && tmp instanceof TextType) {
                    if (((TextType) tmp).getStringValue().equals(fontName) || (noSubSetName != null
                            && ((TextType) tmp).getStringValue().equals(noSubSetName))) {
                        // value found, return
                        return true;
                    }
                }
            }

            // title doesn't match, it is an error.
            StringBuilder sb = new StringBuilder(80);
            sb.append("FontName");
            if (empty) {
                sb.append(
                        " present in the FontDescriptor dictionary can't be found in XMP information the Font File Stream.");
                ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PROPERTY_MISSING, sb.toString()));
            } else {
                sb.append(
                        " present in the FontDescriptor dictionary doesn't match with XMP information dc:title of the Font File Stream.");
                ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb.toString()));
            }
            return false;
        }
    }

    // ---- dc:title is required
    ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PROPERTY_MISSING,
            "dc:title is missing from the FontFile MetaData"));
    return false;
}

From source file:net.timendum.pdf.StatisticParser.java

License:Open Source License

public boolean isItalic(PDFontDescriptor descriptor) {
    if (descriptor.getItalicAngle() != 0f) {
        return true;
    }/*  ww  w  .j  a  v  a  2 s .co  m*/
    if ((descriptor.getFlags() & FLAG_ITALIC) == FLAG_ITALIC) {
        return true;
    }
    if (descriptor.getFontName() != null && descriptor.getFontName().indexOf("Italic") > -1) {
        return true;
    }
    return false;
}

From source file:net.timendum.pdf.StatisticParser.java

License:Open Source License

public boolean isBold(PDFontDescriptor descriptor) {
    if (descriptor.getFontWeight() > averangeFontWeight) {
        return true;
    }// www  .j  ava 2  s.c o  m
    if ((descriptor.getFlags() & FLAG_FORCE_BOLD) == FLAG_FORCE_BOLD) {
        return true;
    }
    if (descriptor.getFontName() != null && descriptor.getFontName().indexOf("Bold") > -1) {
        return true;
    }
    return false;
}

From source file:org.apache.padaf.preflight.font.FontMetaDataValidation.java

License:Apache License

/**
 * Value of the dc:title must be the same as the FontName in the font
 * descriptor.//  www  .  j a  v a 2  s . co  m
 * 
 * @param metadata
 *          XMPMetaData of the Font File Stream
 * @param fontDesc
 *          The FontDescriptor dictionary
 * @param ve
 *          the list of validation error to update if the validation fails
 * @throws ValidationException
 */
public boolean analyseFontName(XMPMetadata metadata, PDFontDescriptor fontDesc, List<ValidationError> ve)
        throws ValidationException {
    String fontName = fontDesc.getFontName();
    String noSubSetName = fontName;
    if (AbstractFontValidator.isSubSet(fontName)) {
        noSubSetName = fontName.split(AbstractFontValidator.getSubSetPatternDelimiter())[1];
    }

    DublinCoreSchema dc = metadata.getDublinCoreSchema();
    if (dc != null) {
        if (dc.getTitle() != null) {
            String defaultTitle = dc.getTitleValue("x-default");
            if (defaultTitle != null) {

                if (!defaultTitle.equals(fontName)
                        && (noSubSetName != null && !defaultTitle.equals(noSubSetName))) {
                    StringBuilder sb = new StringBuilder(80);
                    sb.append("FontName").append(
                            " present in the FontDescriptor dictionary doesn't match with XMP information dc:title of the Font File Stream.");
                    ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb.toString()));
                    return false;
                }

                // --- default value is the right one
                return true;
            } else {
                Iterator<AbstractField> it = dc.getTitle().getContainer().getAllProperties().iterator();
                boolean empty = true;
                while (it.hasNext()) {
                    empty = false;
                    AbstractField tmp = it.next();
                    if (tmp != null && tmp instanceof TextType) {
                        if (((TextType) tmp).getStringValue().equals(fontName) || (noSubSetName != null
                                && ((TextType) tmp).getStringValue().equals(noSubSetName))) {
                            // value found, return
                            return true;
                        }
                    }
                }

                // title doesn't match, it is an error.
                StringBuilder sb = new StringBuilder(80);
                sb.append("FontName");
                if (empty) {
                    sb.append(
                            " present in the FontDescriptor dictionary can't be found in XMP information the Font File Stream.");
                    ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PROPERTY_MISSING,
                            sb.toString()));
                } else {
                    sb.append(
                            " present in the FontDescriptor dictionary doesn't match with XMP information dc:title of the Font File Stream.");
                    ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb.toString()));
                }
                return false;
            }
        }
    }
    return true;
}

From source file:org.xmlcml.font.NonStandardFontManager.java

License:Apache License

private String getFontName(PDFont pdFont) {
    String fontName;//from   www  .  j  a  v a2  s  .  c om
    AMIFont amiFont;
    PDFontDescriptor fd = AMIFont.getFontDescriptorOrDescendantFontDescriptor(pdFont);
    if (fd == null) {
        if (nullFontDescriptorReport) {
            LOG.error("****************** Null Font Descriptor : " + pdFont + "\n       FURTHER ERRORS HIDDEN");
            nullFontDescriptorReport = false;
        }
    }
    if (fd == null) {
        amiFont = this.lookupOrCreateFont(0, (COSDictionary) pdFont.getCOSObject());
        fontName = amiFont.getFontName();
        if (fontName == null) {
            throw new RuntimeException("No currentFontName");
        }
    } else {
        fontName = fd.getFontName();
    }
    return fontName;
}

From source file:org.xmlcml.pdf2svg.AMIFont.java

License:Apache License

public static PDFontDescriptor getDescendantFontDescriptor(PDFont pdFont) {
    PDFontDescriptor fd = null;
    PDFont descendantFont = getFirstDescendantFont(pdFont);
    fd = (descendantFont == null) ? null : descendantFont.getFontDescriptor();
    LOG.trace("fd (" + fd.getFontName() + ") " + fd);
    return fd;//from  www . ja v  a2 s  . c o m
}