Example usage for org.apache.pdfbox.cos COSDictionary getString

Introduction

In this page you can find the example usage for org.apache.pdfbox.cos COSDictionary getString.

Prototype

public String getString(COSName key)

Source Link

Document

This is a convenience method that will get the dictionary object that is expected to be a string.

Usage

From source file:com.esri.geoportal.commons.pdf.PdfUtils.java

License:Apache License

/**
 * Extracts the geospatial metadata from a GeoPDF
 * /*from   w w  w. j  av  a2s . c  o m*/
 * @param page the PDF page to read geospatial metadata from
 * @param geometryServiceUrl url of a <a href="https://developers.arcgis.com/rest/services-reference/geometry-service.htm">geometry service</a> for reprojecting coordinates. 
 * 
 * @see <a href="https://www.loc.gov/preservation/digital/formats/fdd/fdd000312.shtml">Library of Congress information on GeoPDF</a>
 * @see <a href="https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf">The PDF specification</a>, section 8, for instructions for translating coordinates.
 * 
 * @returns the bounding box of the GeoPDF as "yMin xMin, yMax xMax"
 */
private static String extractGeoPDFProps(PDPage page, String geometryServiceUrl) {

    // The LGI dictionary is an array, we'll loop through all entries and pull the first one for a bounding box
    COSArray lgi = (COSArray) page.getCOSObject().getDictionaryObject("LGIDict");

    List<String> bBoxes = new ArrayList<>();

    lgi.iterator().forEachRemaining(item -> {

        String currentBbox = null;

        // Set up the Coordinate Transformation Matrix (used to translate PDF coords to geo coords)
        Double[][] ctmValues = null;

        COSDictionary dictionary = (COSDictionary) item;
        if (dictionary.containsKey("CTM")) {
            ctmValues = new Double[3][3];

            // The last column in the matrix is always constant
            ctmValues[0][2] = 0.0;
            ctmValues[1][2] = 0.0;
            ctmValues[2][2] = 1.0;

            COSArray ctm = (COSArray) dictionary.getDictionaryObject("CTM");
            for (int i = 0; i < ctm.toList().size(); i += 2) {
                int ctmRow = i / 2;
                ctmValues[ctmRow][0] = Double.parseDouble(((COSString) ctm.get(i)).getString());
                ctmValues[ctmRow][1] = Double.parseDouble(((COSString) ctm.get(i + 1)).getString());
            }
        }

        // Get the neatline (i.e. the bounding box in *PDF* coordinates)
        Double[][] neatLineValues = null;
        int neatLineLength = 0;
        if (dictionary.containsKey("Neatline")) {

            COSArray neatline = (COSArray) dictionary.getDictionaryObject("Neatline");
            neatLineLength = neatline.toList().size();
            neatLineValues = new Double[neatLineLength / 2][3];

            for (int i = 0; i < neatline.toList().size(); i += 2) {
                int neatLineRow = i / 2;
                neatLineValues[neatLineRow][0] = Double.parseDouble(((COSString) neatline.get(i)).getString());
                neatLineValues[neatLineRow][1] = Double
                        .parseDouble(((COSString) neatline.get(i + 1)).getString());
                neatLineValues[neatLineRow][2] = 1.0;
            }
        }

        // Translate the PDF coordinates to Geospatial coordintates by multiplying the two matricies
        MultiPoint mp = new MultiPoint();
        if (ctmValues != null && neatLineValues != null) {
            Double[][] resultCoords = new Double[neatLineLength / 2][3];
            for (int z = 0; z < neatLineLength / 2; z++) {
                for (int i = 0; i < 3; i++) {
                    resultCoords[z][i] = neatLineValues[z][0] * ctmValues[0][i]
                            + neatLineValues[z][1] * ctmValues[1][i] + neatLineValues[z][2] * ctmValues[2][i];
                }
                mp.add(resultCoords[z][0], resultCoords[z][1]);
            }
        }

        // Project the geospatial coordinates to WGS84 for the Dublin-Core metadata
        if (dictionary.containsKey("Projection")) {
            COSDictionary projectionDictionary = (COSDictionary) dictionary.getDictionaryObject("Projection");
            String projectionType = projectionDictionary.getString("ProjectionType");

            try (GeometryService svc = new GeometryService(HttpClients.custom().useSystemProperties().build(),
                    new URL(geometryServiceUrl));) {

                // UTM projections require slightly different processing
                if ("UT".equals(projectionType)) {
                    String zone = Integer.toString(projectionDictionary.getInt("Zone"));
                    String hemisphere = projectionDictionary.getString("Hemisphere");

                    // Get the wkt for the geospatial coordinate system
                    String wkt = datumTranslation(projectionDictionary.getItem("Datum"));

                    if (zone != null && hemisphere != null && wkt != null) {
                        // Generate a list of UTM strings
                        List<String> utmCoords = new ArrayList<>();
                        for (Point2D pt : mp.getCoordinates2D()) {
                            String coord = String.format("%s%s %s %s", zone, hemisphere, Math.round(pt.x),
                                    Math.round(pt.y));
                            utmCoords.add(coord);
                        }

                        MultiPoint reproj = svc.fromGeoCoordinateString(utmCoords, WGS84_WKID);

                        currentBbox = generateBbox(reproj);

                    } else {
                        LOG.warn("Missing UTM argument: zone: {}, hemisphere: {}, datum: {}", zone, hemisphere,
                                wkt);
                        LOG.debug("Projection dictionary {}", projectionDictionary);
                    }
                } else {
                    // Generate Well Known Text for projection and re-projects the points to WGS 84
                    String wkt = getProjectionWKT(projectionDictionary, projectionType);

                    if (wkt != null) {
                        MultiPoint reproj = svc.project(mp, wkt, WGS84_WKID);

                        currentBbox = generateBbox(reproj);

                    } else if (LOG.isDebugEnabled()) {
                        // Print out translated coordinates for debugging purposes
                        LOG.debug("Translated Coordinates");
                        for (Point2D pt : mp.getCoordinates2D()) {
                            LOG.debug(String.format("\t%s, %s", pt.x, pt.y));
                        }
                    }
                }
            } catch (Exception e) {
                // If something goes wrong, just try the next set of coordinates
                LOG.error("Exception reprojecting geometry, skipping this geopdf dictionary instance...", e);
            }
        }

        if (currentBbox != null) {
            bBoxes.add(currentBbox);
        }

    });

    return bBoxes.get(0);
}

From source file:modules.PDFFontDependencyExtractorModule.java

License:Apache License

public PDFFontResults extractFontList(File f) throws IOException, InvalidParameterException {
    PDDocument document;/* ww  w  .  ja  va2  s .c om*/
    try {
        document = PDDocument.load(f);
    } catch (IOException x) {
        throw new InvalidParameterException("Not a PDF file");
    }
    SortedSet<FontInformation> ret = new TreeSet<FontInformation>(new Comparator<FontInformation>() {

        @Override
        public int compare(FontInformation o1, FontInformation o2) {
            int a = o1.fontName.compareTo(o2.fontName);
            if (a != 0)
                return a;
            else
                return o1.fontType.compareTo(o2.fontType);
        }

    });

    document.getDocumentCatalog().getAllPages();
    // The code down here is easier as it gets all the fonts used in the
    // document. Still, this would inlcude unused fonts, so we get the fonts
    // page by page and add them to a Hash table.
    for (COSObject c : document.getDocument().getObjectsByType(COSName.FONT)) {
        if (c == null || !(c.getObject() instanceof COSDictionary)) {
            continue;
            // System.out.println(c.getObject());
        }

        COSDictionary fontDictionary = (COSDictionary) c.getObject();
        // System.out.println(dic.getNameAsString(COSName.BASE_FONT));
        // }
        // }
        // int pagen = document.getNumberOfPages();
        // i=0;
        // for (int p=0;p<pagen;p++){
        // PDPage page = (PDPage)pages.get(p);
        // PDResources res = page.findResources();
        // //for each page resources
        // if (res==null) continue;
        // // get the font dictionary
        // COSDictionary fonts = (COSDictionary)
        // res.getCOSDictionary().getDictionaryObject( COSName.FONT );
        // for( COSName fontName : fonts.keySet() ) {
        // COSObject font = (COSObject) fonts.getItem( fontName );
        // // if the font has already been visited we ingore it
        // long objectId = font.getObjectNumber().longValue();
        // if (ret.get(objectId)!=null)
        // continue;
        // if( font==null || ! (font.getObject() instanceof COSDictionary) )
        // continue;
        // COSDictionary fontDictionary = (COSDictionary)font.getObject();

        // Type MUSt be font
        if (!fontDictionary.getNameAsString(COSName.TYPE).equals("Font")) {
            continue;
        }
        // get the variables
        FontInformation fi = new FontInformation();
        fi.fontType = fontDictionary.getNameAsString(COSName.SUBTYPE);

        String baseFont = fontDictionary.getNameAsString(COSName.BASE_FONT);
        if (baseFont == null) {
            continue;
        }
        if (Arrays.binarySearch(standard14, baseFont) >= 0) {
            continue;
        }
        COSDictionary fontDescriptor = (COSDictionary) fontDictionary.getDictionaryObject(COSName.FONT_DESC);
        COSBase enc = fontDictionary.getItem(COSName.ENCODING);
        COSBase uni = fontDictionary.getItem(COSName.TO_UNICODE);
        fontDictionary.getInt(COSName.FIRST_CHAR);
        fontDictionary.getInt(COSName.LAST_CHAR);
        String encoding;
        boolean toUnicode = uni != null;
        if (enc == null) {
            encoding = "standard14";
        }
        if (enc instanceof COSString) {
            encoding = ((COSString) enc).getString();
        } else {
            encoding = "table";
        }
        fi.isSubset = false;
        boolean t = true;
        // Type one and TT can have subsets defineing the basename see 5.5.3
        // pdfref 1.6
        // if (fi.fontType.lastIndexOf(COSName.TYPE1.getName())!=-1 ||
        // fi.fontType.equals(COSName.TRUE_TYPE.getName()) )
        if (baseFont != null) {
            if (baseFont.length() > 6) {
                for (int k = 0; k < 6; k++)
                    if (!Character.isUpperCase(baseFont.charAt(k))) {
                        t = false;
                    }
                if (baseFont.charAt(6) != '+') {
                    t = false;
                }
            } else {
                t = false;
            }
            fi.isSubset = t;
            if (fi.isSubset) {
                fi.baseName = baseFont.substring(0, 6);
                baseFont = baseFont.substring(7);
            }
        }
        fi.fontFlags = 0;
        if (fi.fontType.equals(COSName.TYPE0.getName()) || fi.fontType.equals(COSName.TYPE3.getName())) {
            fi.isEmbedded = true;
        }

        if (fontDescriptor != null) {
            // in Type1 charset indicates font is subsetted
            if (fontDescriptor.getItem(COSName.CHAR_SET) != null) {
                fi.isSubset = true;
            }
            if (fontDescriptor.getItem(COSName.FONT_FILE) != null
                    || fontDescriptor.getItem(COSName.FONT_FILE3) != null
                    || fontDescriptor.getItem(COSName.FONT_FILE2) != null) {
                fi.isEmbedded = true;
            }
            fi.fontFlags = fontDescriptor.getInt(COSName.getPDFName("Flags"));
            fi.fontFamily = fontDescriptor.getString(COSName.FONT_FAMILY);
            fi.fontStretch = fontDescriptor.getString(COSName.FONT_STRETCH);

        }
        fi.charset = encoding;
        fi.fontName = baseFont;
        fi.isToUnicode = toUnicode;
        fi.encoding = fontDictionary.getNameAsString(COSName.CID_TO_GID_MAP);

        ret.add(fi);

    } // for all fonts

    HashMultimap<String, FontInformation> m = HashMultimap.create();

    for (FontInformation ff : ret) {
        m.put(ff.fontName, ff);
    }
    LinkedList<FontInformation> missing = new LinkedList<FontInformation>();
    Set<String> k = m.keySet();
    for (String kk : k) {
        Set<FontInformation> s = m.get(kk);
        if (s.size() < 1) {
            continue;
        }
        if (s.size() > 1) {
            boolean found = false;
            FontInformation ff = null;
            for (FontInformation fonti : s) {
                if (!fonti.isEmbedded) {
                    ff = fonti;
                } else {
                    found = true;
                }
            }
            if (!found) {
                missing.add(ff);
            }
        } else {
            FontInformation ff = s.iterator().next();
            if (!ff.isEmbedded) {
                missing.add(ff);
            }
        }

    }

    // } // for all pages
    // Iterator<FontInformation> it = ret.iterator();
    // FontInformation prev = null;
    // LinkedList<FontInformation> toDelete = new
    // LinkedList<FontInformation>();
    // while (it.hasNext()) {
    // FontInformation current = it.next();
    //
    // if (prev!= null && prev.fontName.equals(current.fontName) &&
    // (prev.fontType.startsWith("CIDFontType") ||
    // current.fontType.startsWith("CIDFontType")))
    // toDelete.add(current);
    // prev = current;
    // }
    //
    // //ret.removeAll(toDelete);
    // FontInformation[] retArray =toDelete.toArray(new FontInformation[0]);
    //

    if (missing.size() == 0) {
        missing = null;
    } else {
        System.out.println("Found missing fonts: " + f);
        System.out.println(missing);
    }
    return new PDFFontResults(new LinkedList<FontInformation>(ret), missing);
}

From source file:net.padaf.preflight.font.Type3FontValidator.java

License:Apache License

/**
 * If the Resources entry is present, this method check its content. Only
 * fonts and Images are checked because this resource describes glyphs. REMARK
 * : The font and the image aren't validated because they will be validated by
 * an other ValidationHelper.//from   w w w. ja  v  a  2s  .c om
 * 
 * @return
 */
private boolean checkResources() throws ValidationException {
    if (this.resources == null) {
        // ---- No resources dictionary.
        return true;
    }

    COSDocument cDoc = this.handler.getDocument().getDocument();
    COSDictionary dictionary = COSUtils.getAsDictionary(this.resources, cDoc);
    if (dictionary == null) {
        this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID,
                "The Resources element isn't a dictionary"));
        return false;
    }

    COSBase cbImg = dictionary.getItem(COSName.getPDFName(DICTIONARY_KEY_XOBJECT));
    COSBase cbFont = dictionary.getItem(COSName.getPDFName(DICTIONARY_KEY_FONT));

    if (cbImg == null && cbFont == null) {
        this.fontContainer.addError(new ValidationError(ERROR_FONTS_TYPE3_DAMAGED,
                "The Resources element doesn't have Glyph information"));
        return false;
    }

    if (cbImg != null) {
        // ---- the referenced objects must be present in the PDF file
        COSDictionary dicImgs = COSUtils.getAsDictionary(cbImg, cDoc);
        Set<COSName> keyList = dicImgs.keySet();
        for (Object key : keyList) {

            COSBase item = dictionary.getItem((COSName) key);
            COSDictionary xObjImg = COSUtils.getAsDictionary(item, cDoc);
            if (xObjImg == null) {
                this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID,
                        "The Resources dictionary of type 3 font is invalid"));
                return false;
            }

            if (!XOBJECT_DICTIONARY_VALUE_SUBTYPE_IMG
                    .equals(xObjImg.getString(COSName.getPDFName(DICTIONARY_KEY_SUBTYPE)))) {
                this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID,
                        "The Resources dictionary of type 3 font is invalid"));
                return false;
            }
        }
    }

    if (cbFont != null) {
        // ---- the referenced object must be present in the PDF file
        COSDictionary dicFonts = COSUtils.getAsDictionary(cbFont, cDoc);
        Set<COSName> keyList = dicFonts.keySet();
        for (Object key : keyList) {

            COSBase item = dictionary.getItem((COSName) key);
            COSDictionary xObjFont = COSUtils.getAsDictionary(item, cDoc);
            if (xObjFont == null) {
                this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID,
                        "The Resources dictionary of type 3 font is invalid"));
                return false;
            }

            if (!FONT_DICTIONARY_VALUE_FONT
                    .equals(xObjFont.getString(COSName.getPDFName(DICTIONARY_KEY_TYPE)))) {
                this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID,
                        "The Resources dictionary of type 3 font is invalid"));
                return false;
            }

            try {
                PDFont aFont = PDFontFactory.createFont(xObjFont);
                // FontContainer aContainer = this.handler.retrieveFontContainer(aFont);
                AbstractFontContainer aContainer = this.handler.getFont(aFont.getCOSObject());
                // ---- another font is used in the Type3, check if the font is valid.
                if (aContainer.isValid() != State.VALID) {
                    this.fontContainer.addError(new ValidationError(ERROR_FONTS_TYPE3_DAMAGED,
                            "The Resources dictionary of type 3 font contains invalid font"));
                    return false;
                }
            } catch (IOException e) {
                throw new ValidationException("Unable to valid the Type3 : " + e.getMessage());
            }
        }
    }

    List<ValidationError> errors = new ArrayList<ValidationError>();
    ExtGStateContainer extGStates = new ExtGStateContainer(dictionary, cDoc);
    boolean res = extGStates.validateTransparencyRules(errors);
    for (ValidationError err : errors) {
        this.fontContainer.addError(err);
    }

    return res && validateShadingPattern(dictionary, errors);
}

From source file:net.padaf.preflight.helpers.CatalogValidationHelper.java

License:Apache License

/**
 * This method checks the content of each OutputIntent. The S entry must
 * contain GTS_PDFA1. The DestOuputProfile must contain a valid ICC Profile
 * Stream.//from  w  ww. j  a v  a  2  s. co m
 * 
 * If there are more than one OutputIntent, they have to use the same ICC
 * Profile.
 * 
 * This method returns a list of ValidationError. It is empty if no errors
 * have been found.
 * 
 * @param handler
 * @return
 * @throws ValidationException
 */
public List<ValidationError> validateOutputIntent(DocumentHandler handler) throws ValidationException {
    List<ValidationError> result = new ArrayList<ValidationError>(0);
    PDDocument pdDocument = handler.getDocument();
    PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
    COSDocument cDoc = pdDocument.getDocument();

    COSBase cBase = catalog.getCOSDictionary()
            .getItem(COSName.getPDFName(DOCUMENT_DICTIONARY_KEY_OUTPUT_INTENTS));
    COSArray outputIntents = COSUtils.getAsArray(cBase, cDoc);

    Map<COSObjectKey, Boolean> tmpDestOutputProfile = new HashMap<COSObjectKey, Boolean>();

    for (int i = 0; outputIntents != null && i < outputIntents.size(); ++i) {
        COSDictionary dictionary = COSUtils.getAsDictionary(outputIntents.get(i), cDoc);

        if (dictionary == null) {

            result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
                    "OutputIntent object is null or isn't a dictionary"));

        } else {
            // ---- S entry is mandatory and must be equals to GTS_PDFA1
            String sValue = dictionary.getNameAsString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_S));
            if (!OUTPUT_INTENT_DICTIONARY_VALUE_GTS_PDFA1.equals(sValue)) {
                result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_S_VALUE_INVALID,
                        "The S entry of the OutputIntent isn't GTS_PDFA1"));
                continue;
            }

            // ---- OutputConditionIdentifier is a mandatory field
            String outputConditionIdentifier = dictionary
                    .getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_OUTPUT_CONDITION_IDENTIFIER));
            if (outputConditionIdentifier == null || "".equals(outputConditionIdentifier)) {
                result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
                        "The OutputIntentCondition is missing"));
                continue;
            }

            // ---- If OutputConditionIdentifier is "Custom" :
            // ---- DestOutputProfile and Info are mandatory
            // ---- DestOutputProfile must be a ICC Profile

            // ---- Because of PDF/A conforming file needs to specify the color
            // characteristics, the DestOutputProfile
            // is checked even if the OutputConditionIdentifier isn't "Custom"
            COSBase dop = dictionary
                    .getItem(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_DEST_OUTPUT_PROFILE));
            ValidationError valer = validateICCProfile(dop, cDoc, tmpDestOutputProfile, handler);
            if (valer != null) {
                result.add(valer);
                continue;
            }

            if (OUTPUT_INTENT_DICTIONARY_VALUE_OUTPUT_CONDITION_IDENTIFIER_CUSTOM
                    .equals(outputConditionIdentifier)) {
                String info = dictionary.getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_INFO));
                if (info == null || "".equals(info)) {
                    result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
                            "The Info entry of a OutputIntent dictionary is missing"));
                    continue;
                }
            }
        }
    }
    return result;
}

From source file:net.sf.jabref.logic.xmp.XMPUtil.java

License:Open Source License

/**
 * Helper function for retrieving a BibEntry from the
 * PDDocumentInformation in a PDF file.//from  ww w  .j  ava 2  s  .  c  o m
 *
 * To understand how to get hold of a PDDocumentInformation have a look in
 * the test cases for XMPUtil.
 *
 * The BibEntry is build by mapping individual fields in the document
 * information (like author, title, keywords) to fields in a bibtex entry.
 *
 * @param di
 *            The document information from which to build a BibEntry.
 *
 * @return The bibtex entry found in the document information.
 */
public static Optional<BibEntry> getBibtexEntryFromDocumentInformation(PDDocumentInformation di) {

    BibEntry entry = new BibEntry();
    entry.setType("misc");

    String s = di.getAuthor();
    if (s != null) {
        entry.setField("author", s);
    }

    s = di.getTitle();
    if (s != null) {
        entry.setField("title", s);
    }

    s = di.getKeywords();
    if (s != null) {
        entry.setField("keywords", s);
    }

    s = di.getSubject();
    if (s != null) {
        entry.setField("abstract", s);
    }

    COSDictionary dict = di.getDictionary();
    for (Map.Entry<COSName, COSBase> o : dict.entrySet()) {
        String key = o.getKey().getName();
        if (key.startsWith("bibtex/")) {
            String value = dict.getString(key);
            key = key.substring("bibtex/".length());
            if ("entrytype".equals(key)) {
                entry.setType(value);
            } else {
                entry.setField(key, value);
            }
        }
    }

    // Return empty Optional if no values were found
    return entry.getFieldNames().isEmpty() ? Optional.empty() : Optional.of(entry);
}

From source file:net.sf.jabref.util.XMPUtil.java

License:Open Source License

/**
 * Helper function for retrieving a BibtexEntry from the
 * PDDocumentInformation in a PDF file./*w  ww.j  a  v  a 2 s  .  c  o  m*/
 * 
 * To understand how to get hold of a PDDocumentInformation have a look in
 * the test cases for XMPUtil.
 * 
 * The BibtexEntry is build by mapping individual fields in the document
 * information (like author, title, keywords) to fields in a bibtex entry.
 * 
 * @param di
 *            The document information from which to build a BibtexEntry.
 * 
 * @return The bibtex entry found in the document information.
 */
@SuppressWarnings("unchecked")
public static BibtexEntry getBibtexEntryFromDocumentInformation(PDDocumentInformation di) {

    BibtexEntry entry = new BibtexEntry();

    String s = di.getAuthor();
    if (s != null) {
        entry.setField("author", s);
    }

    s = di.getTitle();
    if (s != null) {
        entry.setField("title", s);
    }

    s = di.getKeywords();
    if (s != null) {
        entry.setField("keywords", s);
    }

    s = di.getSubject();
    if (s != null) {
        entry.setField("abstract", s);
    }

    COSDictionary dict = di.getDictionary();
    for (Map.Entry<COSName, COSBase> o : dict.entrySet()) {
        String key = o.getKey().getName();
        if (key.startsWith("bibtex/")) {
            String value = dict.getString(key);
            key = key.substring("bibtex/".length());
            if (key.equals("entrytype")) {
                BibtexEntryType type = BibtexEntryType.getStandardType(value);
                if (type != null) {
                    entry.setType(type);
                }
            } else {
                entry.setField(key, value);
            }
        }
    }

    // Return null if no values were found
    return (!entry.getAllFields().isEmpty() ? entry : null);
}

From source file:org.apache.padaf.preflight.font.Type3FontValidator.java

License:Apache License

/**
 * If the Resources entry is present, this method check its content. Only
 * fonts and Images are checked because this resource describes glyphs. REMARK
 * : The font and the image aren't validated because they will be validated by
 * an other ValidationHelper./* w ww .j ava 2s  . c  om*/
 * 
 * @return
 */
private boolean checkResources() throws ValidationException {
    if (this.resources == null) {
        // ---- No resources dictionary.
        return true;
    }

    COSDocument cDoc = this.handler.getDocument().getDocument();
    COSDictionary dictionary = COSUtils.getAsDictionary(this.resources, cDoc);
    if (dictionary == null) {
        this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID,
                "The Resources element isn't a dictionary"));
        return false;
    }

    COSBase cbImg = dictionary.getItem(COSName.getPDFName(DICTIONARY_KEY_XOBJECT));
    COSBase cbFont = dictionary.getItem(COSName.getPDFName(DICTIONARY_KEY_FONT));

    if (cbImg != null) {
        // ---- the referenced objects must be present in the PDF file
        COSDictionary dicImgs = COSUtils.getAsDictionary(cbImg, cDoc);
        Set<COSName> keyList = dicImgs.keySet();
        for (Object key : keyList) {

            COSBase item = dictionary.getItem((COSName) key);
            COSDictionary xObjImg = COSUtils.getAsDictionary(item, cDoc);
            if (xObjImg == null) {
                this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID,
                        "The Resources dictionary of type 3 font is invalid"));
                return false;
            }

            if (!XOBJECT_DICTIONARY_VALUE_SUBTYPE_IMG
                    .equals(xObjImg.getString(COSName.getPDFName(DICTIONARY_KEY_SUBTYPE)))) {
                this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID,
                        "The Resources dictionary of type 3 font is invalid"));
                return false;
            }
        }
    }

    if (cbFont != null) {
        // ---- the referenced object must be present in the PDF file
        COSDictionary dicFonts = COSUtils.getAsDictionary(cbFont, cDoc);
        Set<COSName> keyList = dicFonts.keySet();
        for (Object key : keyList) {

            COSBase item = dictionary.getItem((COSName) key);
            COSDictionary xObjFont = COSUtils.getAsDictionary(item, cDoc);
            if (xObjFont == null) {
                this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID,
                        "The Resources dictionary of type 3 font is invalid"));
                return false;
            }

            if (!FONT_DICTIONARY_VALUE_FONT
                    .equals(xObjFont.getString(COSName.getPDFName(DICTIONARY_KEY_TYPE)))) {
                this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID,
                        "The Resources dictionary of type 3 font is invalid"));
                return false;
            }

            try {
                PDFont aFont = PDFontFactory.createFont(xObjFont);
                // FontContainer aContainer = this.handler.retrieveFontContainer(aFont);
                AbstractFontContainer aContainer = this.handler.getFont(aFont.getCOSObject());
                // ---- another font is used in the Type3, check if the font is valid.
                if (aContainer.isValid() != State.VALID) {
                    this.fontContainer.addError(new ValidationError(ERROR_FONTS_TYPE3_DAMAGED,
                            "The Resources dictionary of type 3 font contains invalid font"));
                    return false;
                }
            } catch (IOException e) {
                throw new ValidationException("Unable to valid the Type3 : " + e.getMessage());
            }
        }
    }

    List<ValidationError> errors = new ArrayList<ValidationError>();
    ExtGStateContainer extGStates = new ExtGStateContainer(dictionary, cDoc);
    boolean res = extGStates.validateTransparencyRules(errors);
    for (ValidationError err : errors) {
        this.fontContainer.addError(err);
    }

    return res && validateShadingPattern(dictionary, errors);
}

From source file:org.apache.padaf.preflight.helpers.CatalogValidationHelper.java

License:Apache License

/**
 * This method checks the content of each OutputIntent. The S entry must
 * contain GTS_PDFA1. The DestOuputProfile must contain a valid ICC Profile
 * Stream./*from   ww w.  jav  a 2 s . c  o m*/
 * 
 * If there are more than one OutputIntent, they have to use the same ICC
 * Profile.
 * 
 * This method returns a list of ValidationError. It is empty if no errors
 * have been found.
 * 
 * @param handler
 * @return
 * @throws ValidationException
 */
public List<ValidationError> validateOutputIntent(DocumentHandler handler) throws ValidationException {
    List<ValidationError> result = new ArrayList<ValidationError>(0);
    PDDocument pdDocument = handler.getDocument();
    PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
    COSDocument cDoc = pdDocument.getDocument();

    COSBase cBase = catalog.getCOSDictionary()
            .getItem(COSName.getPDFName(DOCUMENT_DICTIONARY_KEY_OUTPUT_INTENTS));
    COSArray outputIntents = COSUtils.getAsArray(cBase, cDoc);

    Map<COSObjectKey, Boolean> tmpDestOutputProfile = new HashMap<COSObjectKey, Boolean>();

    for (int i = 0; outputIntents != null && i < outputIntents.size(); ++i) {
        COSDictionary dictionary = COSUtils.getAsDictionary(outputIntents.get(i), cDoc);

        if (dictionary == null) {

            result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
                    "OutputIntent object is null or isn't a dictionary"));

        } else {
            // ---- S entry is mandatory and must be equals to GTS_PDFA1
            String sValue = dictionary.getNameAsString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_S));
            if (!OUTPUT_INTENT_DICTIONARY_VALUE_GTS_PDFA1.equals(sValue)) {
                result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_S_VALUE_INVALID,
                        "The S entry of the OutputIntent isn't GTS_PDFA1"));
                continue;
            }

            // ---- OutputConditionIdentifier is a mandatory field
            String outputConditionIdentifier = dictionary
                    .getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_OUTPUT_CONDITION_IDENTIFIER));
            if (outputConditionIdentifier == null) {// empty string is autorized (it may be an application specific value)
                result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
                        "The OutputIntentCondition is missing"));
                continue;
            }

            // ---- If OutputConditionIdentifier is "Custom" or a non Standard ICC Characterization :
            // ---- DestOutputProfile and Info are mandatory
            // ---- DestOutputProfile must be a ICC Profile

            // ---- Because of PDF/A conforming file needs to specify the color characteristics, the DestOutputProfile
            // ---- is checked even if the OutputConditionIdentifier isn't "Custom"
            COSBase dop = dictionary
                    .getItem(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_DEST_OUTPUT_PROFILE));
            ValidationError valer = validateICCProfile(dop, cDoc, tmpDestOutputProfile, handler);
            if (valer != null) {
                result.add(valer);
                continue;
            }

            // TODO [LAZY] When Lazy mode will be added, this block should be uncommented to set result as warning.
            //            if (!isStandardICCCharacterization(outputConditionIdentifier)) {
            //               String info = dictionary.getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_INFO));
            //               if (info == null || "".equals(info)) {
            //                  result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
            //                        "The Info entry of a OutputIntent dictionary is missing"));
            //                  continue;
            //               }
            //            }
        }
    }
    return result;
}

From source file:org.xstudiosys.pdfxmp.XMPUtil.java

License:Open Source License

/**
 * Helper function for retrieving a BibtexEntry from the
 * PDDocumentInformation in a PDF file.//www  . j a va  2  s.c o  m
 * 
 * To understand how to get hold of a PDDocumentInformation have a look in
 * the test cases for XMPUtil.
 * 
 * The BibtexEntry is build by mapping individual fields in the document
 * information (like author, title, keywords) to fields in a bibtex entry.
 * 
 * @param di
 *            The document information from which to build a BibtexEntry.
 * 
 * @return The bibtex entry found in the document information.
 */
@SuppressWarnings("unchecked")
public static BibtexEntry getBibtexEntryFromDocumentInformation(PDDocumentInformation di) {

    BibtexEntry entry = new BibtexEntry();

    String s = di.getAuthor();
    if (s != null)
        entry.setField("author", s);

    s = di.getTitle();
    if (s != null)
        entry.setField("title", s);

    s = di.getKeywords();
    if (s != null)
        entry.setField("keywords", s);

    s = di.getSubject();
    if (s != null)
        entry.setField("abstract", s);

    COSDictionary dict = di.getDictionary();
    for (Map.Entry<COSName, COSBase> o : dict.entrySet()) {
        String key = o.getKey().getName();
        if (key.startsWith("bibtex/")) {
            String value = dict.getString(key);
            key = key.substring("bibtex/".length());
            if (key.equals("entrytype")) {
                BibtexEntryType type = BibtexEntryType.getStandardType(value);
                if (type != null)
                    entry.setType(type);
            } else
                entry.setField(key, value);
        }
    }

    // Return null if no values were found
    return (entry.getAllFields().size() > 0 ? entry : null);
}

From source file:uk.ac.liverpool.thumbnails.PDFService.java

License:Open Source License

@Override
public FontInformation[] extractFontList(URI u, File fff) throws MalformedURLException, IOException {

    SortedSet<FontInformation> ret = new TreeSet<FontInformation>();
    PDDocument document = getPages(u, fff);
    List pages = document.getDocumentCatalog().getAllPages();
    int i = 0;/* w w w.  j ava 2s.  co  m*/
    // The code down here is easier as it gets all the fonts used in the document. Still, this would inlcude unused fonts, so we get the fonts page by page and add them to a Hash table.
    for (COSObject c : document.getDocument().getObjectsByType(COSName.FONT)) {
        if (c == null || !(c.getObject() instanceof COSDictionary))
            continue;
        //System.out.println(c.getObject());

        COSDictionary fontDictionary = (COSDictionary) c.getObject();
        // System.out.println(dic.getNameAsString(COSName.BASE_FONT));
        //            }
        //        }
        //        int pagen = document.getNumberOfPages();
        //        i=0;
        //        for (int p=0;p<pagen;p++){
        //            PDPage page = (PDPage)pages.get(p);
        //            PDResources res = page.findResources();
        //            //for each page resources
        //            if (res==null) continue; 
        //            // get the font dictionary
        //            COSDictionary fonts = (COSDictionary) res.getCOSDictionary().getDictionaryObject( COSName.FONT );
        //            for( COSName fontName : fonts.keySet() ) {
        //                COSObject font = (COSObject) fonts.getItem( fontName );
        //                // if the font has already been visited we ingore it
        //                long objectId = font.getObjectNumber().longValue();
        //                if (ret.get(objectId)!=null)
        //                    continue;
        //                if( font==null ||  ! (font.getObject() instanceof COSDictionary) )
        //                    continue;
        //                COSDictionary fontDictionary = (COSDictionary)font.getObject();

        // Type MUSt be font
        if (!fontDictionary.getNameAsString(COSName.TYPE).equals("Font"))
            continue;
        // get the variables
        FontInformation fi = new FontInformation();
        fi.fontType = fontDictionary.getNameAsString(COSName.SUBTYPE);

        String baseFont = fontDictionary.getNameAsString(COSName.BASE_FONT);
        if (baseFont == null)
            continue;
        if (Arrays.binarySearch(standard14, baseFont) >= 0)
            continue;
        COSDictionary fontDescriptor = (COSDictionary) fontDictionary.getDictionaryObject(COSName.FONT_DESC);
        COSBase enc = fontDictionary.getItem(COSName.ENCODING);
        COSBase uni = fontDictionary.getItem(COSName.TO_UNICODE);
        int firstChar = fontDictionary.getInt(COSName.FIRST_CHAR);
        int lastChar = fontDictionary.getInt(COSName.LAST_CHAR);
        String encoding;
        boolean toUnicode = uni != null;
        if (enc == null) {
            encoding = "standard14";
        }
        if (enc instanceof COSString) {
            encoding = ((COSString) enc).getString();
        } else {
            encoding = "table";
        }
        fi.isSubset = false;
        boolean t = true;
        // Type one and TT can have subsets defineing the basename see 5.5.3 pdfref 1.6
        //  if (fi.fontType.lastIndexOf(COSName.TYPE1.getName())!=-1 || fi.fontType.equals(COSName.TRUE_TYPE.getName()) )
        if (baseFont != null) {
            if (baseFont.length() > 6) {
                for (int k = 0; k < 6; k++)
                    if (!Character.isUpperCase(baseFont.charAt(k)))
                        t = false;
                if (baseFont.charAt(6) != '+')
                    t = false;
            } else
                t = false;
            fi.isSubset = t;
            if (fi.isSubset)
                baseFont = baseFont.substring(7);
        }
        fi.fontFlags = 0;
        if (fi.fontType.equals(COSName.TYPE0) || fi.fontType.equals(COSName.TYPE3))
            fi.isEmbedded = true;

        if (fontDescriptor != null) {
            // in Type1 charset indicates font is subsetted
            if (fontDescriptor.getItem(COSName.CHAR_SET) != null)
                fi.isSubset = true;
            if (fontDescriptor.getItem(COSName.FONT_FILE) != null
                    || fontDescriptor.getItem(COSName.FONT_FILE3) != null
                    || fontDescriptor.getItem(COSName.FONT_FILE2) != null)
                fi.isEmbedded = true;
            fi.fontFlags = fontDescriptor.getInt(COSName.getPDFName("Flags"));
            fi.fontFamily = fontDescriptor.getString(COSName.FONT_FAMILY);
            fi.fontStretch = fontDescriptor.getString(COSName.FONT_STRETCH);
        }
        fi.charset = encoding;
        fi.fontName = baseFont;
        fi.isToUnicode = toUnicode;

        ret.add(fi);

    } // for all fonts 

    //    } // for all pages
    Iterator<FontInformation> it = ret.iterator();
    FontInformation prev = null;
    LinkedList<FontInformation> toDelete = new LinkedList<FontInformation>();
    while (it.hasNext()) {
        FontInformation current = it.next();

        if (prev != null && prev.fontName.equals(current.fontName) && prev.fontType.startsWith("CIDFontType"))
            toDelete.add(current);
        prev = current;
    }
    ret.removeAll(toDelete);
    FontInformation[] retArray = ret.toArray(new FontInformation[0]);

    return retArray;
}