Example usage for org.apache.pdfbox.cos COSName CID_TO_GID_MAP

List of usage examples for org.apache.pdfbox.cos COSName CID_TO_GID_MAP

Introduction

In this page you can find the example usage for org.apache.pdfbox.cos COSName CID_TO_GID_MAP.

Prototype

COSName CID_TO_GID_MAP

To view the source code for org.apache.pdfbox.cos COSName CID_TO_GID_MAP.

Click Source Link

Usage

From source file:modules.PDFFontDependencyExtractorModule.java

License:Apache License

public PDFFontResults extractFontList(File f) throws IOException, InvalidParameterException {
    PDDocument document;//from w  w  w  .  j  a v a 2 s .  co  m
    try {
        document = PDDocument.load(f);
    } catch (IOException x) {
        throw new InvalidParameterException("Not a PDF file");
    }
    SortedSet<FontInformation> ret = new TreeSet<FontInformation>(new Comparator<FontInformation>() {

        @Override
        public int compare(FontInformation o1, FontInformation o2) {
            int a = o1.fontName.compareTo(o2.fontName);
            if (a != 0)
                return a;
            else
                return o1.fontType.compareTo(o2.fontType);
        }

    });

    document.getDocumentCatalog().getAllPages();
    // The code down here is easier as it gets all the fonts used in the
    // document. Still, this would inlcude unused fonts, so we get the fonts
    // page by page and add them to a Hash table.
    for (COSObject c : document.getDocument().getObjectsByType(COSName.FONT)) {
        if (c == null || !(c.getObject() instanceof COSDictionary)) {
            continue;
            // System.out.println(c.getObject());
        }

        COSDictionary fontDictionary = (COSDictionary) c.getObject();
        // System.out.println(dic.getNameAsString(COSName.BASE_FONT));
        // }
        // }
        // int pagen = document.getNumberOfPages();
        // i=0;
        // for (int p=0;p<pagen;p++){
        // PDPage page = (PDPage)pages.get(p);
        // PDResources res = page.findResources();
        // //for each page resources
        // if (res==null) continue;
        // // get the font dictionary
        // COSDictionary fonts = (COSDictionary)
        // res.getCOSDictionary().getDictionaryObject( COSName.FONT );
        // for( COSName fontName : fonts.keySet() ) {
        // COSObject font = (COSObject) fonts.getItem( fontName );
        // // if the font has already been visited we ingore it
        // long objectId = font.getObjectNumber().longValue();
        // if (ret.get(objectId)!=null)
        // continue;
        // if( font==null || ! (font.getObject() instanceof COSDictionary) )
        // continue;
        // COSDictionary fontDictionary = (COSDictionary)font.getObject();

        // Type MUSt be font
        if (!fontDictionary.getNameAsString(COSName.TYPE).equals("Font")) {
            continue;
        }
        // get the variables
        FontInformation fi = new FontInformation();
        fi.fontType = fontDictionary.getNameAsString(COSName.SUBTYPE);

        String baseFont = fontDictionary.getNameAsString(COSName.BASE_FONT);
        if (baseFont == null) {
            continue;
        }
        if (Arrays.binarySearch(standard14, baseFont) >= 0) {
            continue;
        }
        COSDictionary fontDescriptor = (COSDictionary) fontDictionary.getDictionaryObject(COSName.FONT_DESC);
        COSBase enc = fontDictionary.getItem(COSName.ENCODING);
        COSBase uni = fontDictionary.getItem(COSName.TO_UNICODE);
        fontDictionary.getInt(COSName.FIRST_CHAR);
        fontDictionary.getInt(COSName.LAST_CHAR);
        String encoding;
        boolean toUnicode = uni != null;
        if (enc == null) {
            encoding = "standard14";
        }
        if (enc instanceof COSString) {
            encoding = ((COSString) enc).getString();
        } else {
            encoding = "table";
        }
        fi.isSubset = false;
        boolean t = true;
        // Type one and TT can have subsets defineing the basename see 5.5.3
        // pdfref 1.6
        // if (fi.fontType.lastIndexOf(COSName.TYPE1.getName())!=-1 ||
        // fi.fontType.equals(COSName.TRUE_TYPE.getName()) )
        if (baseFont != null) {
            if (baseFont.length() > 6) {
                for (int k = 0; k < 6; k++)
                    if (!Character.isUpperCase(baseFont.charAt(k))) {
                        t = false;
                    }
                if (baseFont.charAt(6) != '+') {
                    t = false;
                }
            } else {
                t = false;
            }
            fi.isSubset = t;
            if (fi.isSubset) {
                fi.baseName = baseFont.substring(0, 6);
                baseFont = baseFont.substring(7);
            }
        }
        fi.fontFlags = 0;
        if (fi.fontType.equals(COSName.TYPE0.getName()) || fi.fontType.equals(COSName.TYPE3.getName())) {
            fi.isEmbedded = true;
        }

        if (fontDescriptor != null) {
            // in Type1 charset indicates font is subsetted
            if (fontDescriptor.getItem(COSName.CHAR_SET) != null) {
                fi.isSubset = true;
            }
            if (fontDescriptor.getItem(COSName.FONT_FILE) != null
                    || fontDescriptor.getItem(COSName.FONT_FILE3) != null
                    || fontDescriptor.getItem(COSName.FONT_FILE2) != null) {
                fi.isEmbedded = true;
            }
            fi.fontFlags = fontDescriptor.getInt(COSName.getPDFName("Flags"));
            fi.fontFamily = fontDescriptor.getString(COSName.FONT_FAMILY);
            fi.fontStretch = fontDescriptor.getString(COSName.FONT_STRETCH);

        }
        fi.charset = encoding;
        fi.fontName = baseFont;
        fi.isToUnicode = toUnicode;
        fi.encoding = fontDictionary.getNameAsString(COSName.CID_TO_GID_MAP);

        ret.add(fi);

    } // for all fonts

    HashMultimap<String, FontInformation> m = HashMultimap.create();

    for (FontInformation ff : ret) {
        m.put(ff.fontName, ff);
    }
    LinkedList<FontInformation> missing = new LinkedList<FontInformation>();
    Set<String> k = m.keySet();
    for (String kk : k) {
        Set<FontInformation> s = m.get(kk);
        if (s.size() < 1) {
            continue;
        }
        if (s.size() > 1) {
            boolean found = false;
            FontInformation ff = null;
            for (FontInformation fonti : s) {
                if (!fonti.isEmbedded) {
                    ff = fonti;
                } else {
                    found = true;
                }
            }
            if (!found) {
                missing.add(ff);
            }
        } else {
            FontInformation ff = s.iterator().next();
            if (!ff.isEmbedded) {
                missing.add(ff);
            }
        }

    }

    // } // for all pages
    // Iterator<FontInformation> it = ret.iterator();
    // FontInformation prev = null;
    // LinkedList<FontInformation> toDelete = new
    // LinkedList<FontInformation>();
    // while (it.hasNext()) {
    // FontInformation current = it.next();
    //
    // if (prev!= null && prev.fontName.equals(current.fontName) &&
    // (prev.fontType.startsWith("CIDFontType") ||
    // current.fontType.startsWith("CIDFontType")))
    // toDelete.add(current);
    // prev = current;
    // }
    //
    // //ret.removeAll(toDelete);
    // FontInformation[] retArray =toDelete.toArray(new FontInformation[0]);
    //

    if (missing.size() == 0) {
        missing = null;
    } else {
        System.out.println("Found missing fonts: " + f);
        System.out.println(missing);
    }
    return new PDFFontResults(new LinkedList<FontInformation>(ret), missing);
}