Example usage for org.apache.pdfbox.pdmodel.font PDFont hashCode

List of usage examples for org.apache.pdfbox.pdmodel.font PDFont hashCode

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel.font PDFont hashCode.

Prototype

@Override
    public int hashCode() 

Source Link

Usage

From source file:org.elacin.pdfextract.datasource.pdfbox.Fonts.java

License:Apache License

private FontInfo getFontInfo(@NotNull PDFont pdFont) {

    if (fontInfoCache.containsKey(pdFont)) {
        return fontInfoCache.get(pdFont);
    }/*from  w w w  .ja  v  a2s.com*/

    /* find the appropriate font name to use  - baseFont might sometimes be null */
    String font;

    if (pdFont.getBaseFont() == null) {
        font = pdFont.getSubType();
    } else {
        font = pdFont.getBaseFont();
    }

    /*
     *  a lot of embedded fonts have names like XCSFS+Times, so remove everything before and
     * including '+'
     */
    final int plusIndex = font.indexOf('+');

    if (plusIndex != -1) {
        font = font.substring(plusIndex + 1, font.length());
    }

    boolean mathFont = (font.length() > 4) && mathFonts.contains(font.substring(0, 4));
    boolean bold = font.toLowerCase().contains("bold");
    boolean italic = font.toLowerCase().contains("italic");

    /* ignore information after this */
    final int idx = font.indexOf(',');

    if (idx != -1) {
        font = font.substring(0, idx);
    }

    /* make a distinction between type3 fonts which usually have no name */
    if ((pdFont instanceof PDType3Font) && (pdFont.getBaseFont() == null)) {
        font = "Type3[" + Integer.toHexString(pdFont.hashCode()) + "]";
    }

    /**
     * Find the plain font name, without any other information
     * Three typical font names can be:
     * - LPPMinionUnicode-Italic
     * - LPPMyriadCondLightUnicode (as apposed to for example LPPMyriadCondUnicode and
     * LPPMyriadLightUnicode-Bold)
     * - Times-Bold (Type1)
     *
     * I want to separate the LPPMinion part for example from the first, so i look for the
     *  index of the first capital letter after a small one.
     *  Also stop if we reach an '-' or whitespace, as that is a normal separators
     */

    // LPPMinionUnicode-Italic
    final String[] fontParts = font.split("[-,]");
    final String subType;

    if (fontParts.length > 1) {
        subType = fontParts[1];
    } else {
        subType = "";
    }

    font = fontParts[0];

    /* this is latex specific */
    if (font.contains("CMBX")) {
        font = font.replace("CMBX", "CMR");
        bold = true;
        italic = false;
    } else if (font.contains("CMTI")) {
        font = font.replace("CMTI", "CMR");
        bold = false;
        italic = true;
    }

    final FontInfo fi = new FontInfo();

    fi.font = font;
    fi.subType = subType;
    fi.bold = bold;
    fi.italic = italic;
    fi.mathFont = mathFont;
    fontInfoCache.put(pdFont, fi);

    return fi;
}