List of usage examples for org.apache.pdfbox.cos COSDictionary getString
public String getString(COSName key)
From source file:com.esri.geoportal.commons.pdf.PdfUtils.java
License:Apache License
/** * Extracts the geospatial metadata from a GeoPDF * /*from w w w. j av a2s . c o m*/ * @param page the PDF page to read geospatial metadata from * @param geometryServiceUrl url of a <a href="https://developers.arcgis.com/rest/services-reference/geometry-service.htm">geometry service</a> for reprojecting coordinates. * * @see <a href="https://www.loc.gov/preservation/digital/formats/fdd/fdd000312.shtml">Library of Congress information on GeoPDF</a> * @see <a href="https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf">The PDF specification</a>, section 8, for instructions for translating coordinates. * * @returns the bounding box of the GeoPDF as "yMin xMin, yMax xMax" */ private static String extractGeoPDFProps(PDPage page, String geometryServiceUrl) { // The LGI dictionary is an array, we'll loop through all entries and pull the first one for a bounding box COSArray lgi = (COSArray) page.getCOSObject().getDictionaryObject("LGIDict"); List<String> bBoxes = new ArrayList<>(); lgi.iterator().forEachRemaining(item -> { String currentBbox = null; // Set up the Coordinate Transformation Matrix (used to translate PDF coords to geo coords) Double[][] ctmValues = null; COSDictionary dictionary = (COSDictionary) item; if (dictionary.containsKey("CTM")) { ctmValues = new Double[3][3]; // The last column in the matrix is always constant ctmValues[0][2] = 0.0; ctmValues[1][2] = 0.0; ctmValues[2][2] = 1.0; COSArray ctm = (COSArray) dictionary.getDictionaryObject("CTM"); for (int i = 0; i < ctm.toList().size(); i += 2) { int ctmRow = i / 2; ctmValues[ctmRow][0] = Double.parseDouble(((COSString) ctm.get(i)).getString()); ctmValues[ctmRow][1] = Double.parseDouble(((COSString) ctm.get(i + 1)).getString()); } } // Get the neatline (i.e. the bounding box in *PDF* coordinates) Double[][] neatLineValues = null; int neatLineLength = 0; if (dictionary.containsKey("Neatline")) { COSArray neatline = (COSArray) dictionary.getDictionaryObject("Neatline"); neatLineLength = neatline.toList().size(); neatLineValues = new Double[neatLineLength / 2][3]; for (int i = 0; i < neatline.toList().size(); i += 2) { int neatLineRow = i / 2; neatLineValues[neatLineRow][0] = Double.parseDouble(((COSString) neatline.get(i)).getString()); neatLineValues[neatLineRow][1] = Double .parseDouble(((COSString) neatline.get(i + 1)).getString()); neatLineValues[neatLineRow][2] = 1.0; } } // Translate the PDF coordinates to Geospatial coordintates by multiplying the two matricies MultiPoint mp = new MultiPoint(); if (ctmValues != null && neatLineValues != null) { Double[][] resultCoords = new Double[neatLineLength / 2][3]; for (int z = 0; z < neatLineLength / 2; z++) { for (int i = 0; i < 3; i++) { resultCoords[z][i] = neatLineValues[z][0] * ctmValues[0][i] + neatLineValues[z][1] * ctmValues[1][i] + neatLineValues[z][2] * ctmValues[2][i]; } mp.add(resultCoords[z][0], resultCoords[z][1]); } } // Project the geospatial coordinates to WGS84 for the Dublin-Core metadata if (dictionary.containsKey("Projection")) { COSDictionary projectionDictionary = (COSDictionary) dictionary.getDictionaryObject("Projection"); String projectionType = projectionDictionary.getString("ProjectionType"); try (GeometryService svc = new GeometryService(HttpClients.custom().useSystemProperties().build(), new URL(geometryServiceUrl));) { // UTM projections require slightly different processing if ("UT".equals(projectionType)) { String zone = Integer.toString(projectionDictionary.getInt("Zone")); String hemisphere = projectionDictionary.getString("Hemisphere"); // Get the wkt for the geospatial coordinate system String wkt = datumTranslation(projectionDictionary.getItem("Datum")); if (zone != null && hemisphere != null && wkt != null) { // Generate a list of UTM strings List<String> utmCoords = new ArrayList<>(); for (Point2D pt : mp.getCoordinates2D()) { String coord = String.format("%s%s %s %s", zone, hemisphere, Math.round(pt.x), Math.round(pt.y)); utmCoords.add(coord); } MultiPoint reproj = svc.fromGeoCoordinateString(utmCoords, WGS84_WKID); currentBbox = generateBbox(reproj); } else { LOG.warn("Missing UTM argument: zone: {}, hemisphere: {}, datum: {}", zone, hemisphere, wkt); LOG.debug("Projection dictionary {}", projectionDictionary); } } else { // Generate Well Known Text for projection and re-projects the points to WGS 84 String wkt = getProjectionWKT(projectionDictionary, projectionType); if (wkt != null) { MultiPoint reproj = svc.project(mp, wkt, WGS84_WKID); currentBbox = generateBbox(reproj); } else if (LOG.isDebugEnabled()) { // Print out translated coordinates for debugging purposes LOG.debug("Translated Coordinates"); for (Point2D pt : mp.getCoordinates2D()) { LOG.debug(String.format("\t%s, %s", pt.x, pt.y)); } } } } catch (Exception e) { // If something goes wrong, just try the next set of coordinates LOG.error("Exception reprojecting geometry, skipping this geopdf dictionary instance...", e); } } if (currentBbox != null) { bBoxes.add(currentBbox); } }); return bBoxes.get(0); }
From source file:modules.PDFFontDependencyExtractorModule.java
License:Apache License
public PDFFontResults extractFontList(File f) throws IOException, InvalidParameterException { PDDocument document;/* ww w . ja va2 s .c om*/ try { document = PDDocument.load(f); } catch (IOException x) { throw new InvalidParameterException("Not a PDF file"); } SortedSet<FontInformation> ret = new TreeSet<FontInformation>(new Comparator<FontInformation>() { @Override public int compare(FontInformation o1, FontInformation o2) { int a = o1.fontName.compareTo(o2.fontName); if (a != 0) return a; else return o1.fontType.compareTo(o2.fontType); } }); document.getDocumentCatalog().getAllPages(); // The code down here is easier as it gets all the fonts used in the // document. Still, this would inlcude unused fonts, so we get the fonts // page by page and add them to a Hash table. for (COSObject c : document.getDocument().getObjectsByType(COSName.FONT)) { if (c == null || !(c.getObject() instanceof COSDictionary)) { continue; // System.out.println(c.getObject()); } COSDictionary fontDictionary = (COSDictionary) c.getObject(); // System.out.println(dic.getNameAsString(COSName.BASE_FONT)); // } // } // int pagen = document.getNumberOfPages(); // i=0; // for (int p=0;p<pagen;p++){ // PDPage page = (PDPage)pages.get(p); // PDResources res = page.findResources(); // //for each page resources // if (res==null) continue; // // get the font dictionary // COSDictionary fonts = (COSDictionary) // res.getCOSDictionary().getDictionaryObject( COSName.FONT ); // for( COSName fontName : fonts.keySet() ) { // COSObject font = (COSObject) fonts.getItem( fontName ); // // if the font has already been visited we ingore it // long objectId = font.getObjectNumber().longValue(); // if (ret.get(objectId)!=null) // continue; // if( font==null || ! (font.getObject() instanceof COSDictionary) ) // continue; // COSDictionary fontDictionary = (COSDictionary)font.getObject(); // Type MUSt be font if (!fontDictionary.getNameAsString(COSName.TYPE).equals("Font")) { continue; } // get the variables FontInformation fi = new FontInformation(); fi.fontType = fontDictionary.getNameAsString(COSName.SUBTYPE); String baseFont = fontDictionary.getNameAsString(COSName.BASE_FONT); if (baseFont == null) { continue; } if (Arrays.binarySearch(standard14, baseFont) >= 0) { continue; } COSDictionary fontDescriptor = (COSDictionary) fontDictionary.getDictionaryObject(COSName.FONT_DESC); COSBase enc = fontDictionary.getItem(COSName.ENCODING); COSBase uni = fontDictionary.getItem(COSName.TO_UNICODE); fontDictionary.getInt(COSName.FIRST_CHAR); fontDictionary.getInt(COSName.LAST_CHAR); String encoding; boolean toUnicode = uni != null; if (enc == null) { encoding = "standard14"; } if (enc instanceof COSString) { encoding = ((COSString) enc).getString(); } else { encoding = "table"; } fi.isSubset = false; boolean t = true; // Type one and TT can have subsets defineing the basename see 5.5.3 // pdfref 1.6 // if (fi.fontType.lastIndexOf(COSName.TYPE1.getName())!=-1 || // fi.fontType.equals(COSName.TRUE_TYPE.getName()) ) if (baseFont != null) { if (baseFont.length() > 6) { for (int k = 0; k < 6; k++) if (!Character.isUpperCase(baseFont.charAt(k))) { t = false; } if (baseFont.charAt(6) != '+') { t = false; } } else { t = false; } fi.isSubset = t; if (fi.isSubset) { fi.baseName = baseFont.substring(0, 6); baseFont = baseFont.substring(7); } } fi.fontFlags = 0; if (fi.fontType.equals(COSName.TYPE0.getName()) || fi.fontType.equals(COSName.TYPE3.getName())) { fi.isEmbedded = true; } if (fontDescriptor != null) { // in Type1 charset indicates font is subsetted if (fontDescriptor.getItem(COSName.CHAR_SET) != null) { fi.isSubset = true; } if (fontDescriptor.getItem(COSName.FONT_FILE) != null || fontDescriptor.getItem(COSName.FONT_FILE3) != null || fontDescriptor.getItem(COSName.FONT_FILE2) != null) { fi.isEmbedded = true; } fi.fontFlags = fontDescriptor.getInt(COSName.getPDFName("Flags")); fi.fontFamily = fontDescriptor.getString(COSName.FONT_FAMILY); fi.fontStretch = fontDescriptor.getString(COSName.FONT_STRETCH); } fi.charset = encoding; fi.fontName = baseFont; fi.isToUnicode = toUnicode; fi.encoding = fontDictionary.getNameAsString(COSName.CID_TO_GID_MAP); ret.add(fi); } // for all fonts HashMultimap<String, FontInformation> m = HashMultimap.create(); for (FontInformation ff : ret) { m.put(ff.fontName, ff); } LinkedList<FontInformation> missing = new LinkedList<FontInformation>(); Set<String> k = m.keySet(); for (String kk : k) { Set<FontInformation> s = m.get(kk); if (s.size() < 1) { continue; } if (s.size() > 1) { boolean found = false; FontInformation ff = null; for (FontInformation fonti : s) { if (!fonti.isEmbedded) { ff = fonti; } else { found = true; } } if (!found) { missing.add(ff); } } else { FontInformation ff = s.iterator().next(); if (!ff.isEmbedded) { missing.add(ff); } } } // } // for all pages // Iterator<FontInformation> it = ret.iterator(); // FontInformation prev = null; // LinkedList<FontInformation> toDelete = new // LinkedList<FontInformation>(); // while (it.hasNext()) { // FontInformation current = it.next(); // // if (prev!= null && prev.fontName.equals(current.fontName) && // (prev.fontType.startsWith("CIDFontType") || // current.fontType.startsWith("CIDFontType"))) // toDelete.add(current); // prev = current; // } // // //ret.removeAll(toDelete); // FontInformation[] retArray =toDelete.toArray(new FontInformation[0]); // if (missing.size() == 0) { missing = null; } else { System.out.println("Found missing fonts: " + f); System.out.println(missing); } return new PDFFontResults(new LinkedList<FontInformation>(ret), missing); }
From source file:net.padaf.preflight.font.Type3FontValidator.java
License:Apache License
/** * If the Resources entry is present, this method check its content. Only * fonts and Images are checked because this resource describes glyphs. REMARK * : The font and the image aren't validated because they will be validated by * an other ValidationHelper.//from w w w. ja v a 2s .c om * * @return */ private boolean checkResources() throws ValidationException { if (this.resources == null) { // ---- No resources dictionary. return true; } COSDocument cDoc = this.handler.getDocument().getDocument(); COSDictionary dictionary = COSUtils.getAsDictionary(this.resources, cDoc); if (dictionary == null) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID, "The Resources element isn't a dictionary")); return false; } COSBase cbImg = dictionary.getItem(COSName.getPDFName(DICTIONARY_KEY_XOBJECT)); COSBase cbFont = dictionary.getItem(COSName.getPDFName(DICTIONARY_KEY_FONT)); if (cbImg == null && cbFont == null) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_TYPE3_DAMAGED, "The Resources element doesn't have Glyph information")); return false; } if (cbImg != null) { // ---- the referenced objects must be present in the PDF file COSDictionary dicImgs = COSUtils.getAsDictionary(cbImg, cDoc); Set<COSName> keyList = dicImgs.keySet(); for (Object key : keyList) { COSBase item = dictionary.getItem((COSName) key); COSDictionary xObjImg = COSUtils.getAsDictionary(item, cDoc); if (xObjImg == null) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID, "The Resources dictionary of type 3 font is invalid")); return false; } if (!XOBJECT_DICTIONARY_VALUE_SUBTYPE_IMG .equals(xObjImg.getString(COSName.getPDFName(DICTIONARY_KEY_SUBTYPE)))) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID, "The Resources dictionary of type 3 font is invalid")); return false; } } } if (cbFont != null) { // ---- the referenced object must be present in the PDF file COSDictionary dicFonts = COSUtils.getAsDictionary(cbFont, cDoc); Set<COSName> keyList = dicFonts.keySet(); for (Object key : keyList) { COSBase item = dictionary.getItem((COSName) key); COSDictionary xObjFont = COSUtils.getAsDictionary(item, cDoc); if (xObjFont == null) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID, "The Resources dictionary of type 3 font is invalid")); return false; } if (!FONT_DICTIONARY_VALUE_FONT .equals(xObjFont.getString(COSName.getPDFName(DICTIONARY_KEY_TYPE)))) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID, "The Resources dictionary of type 3 font is invalid")); return false; } try { PDFont aFont = PDFontFactory.createFont(xObjFont); // FontContainer aContainer = this.handler.retrieveFontContainer(aFont); AbstractFontContainer aContainer = this.handler.getFont(aFont.getCOSObject()); // ---- another font is used in the Type3, check if the font is valid. if (aContainer.isValid() != State.VALID) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_TYPE3_DAMAGED, "The Resources dictionary of type 3 font contains invalid font")); return false; } } catch (IOException e) { throw new ValidationException("Unable to valid the Type3 : " + e.getMessage()); } } } List<ValidationError> errors = new ArrayList<ValidationError>(); ExtGStateContainer extGStates = new ExtGStateContainer(dictionary, cDoc); boolean res = extGStates.validateTransparencyRules(errors); for (ValidationError err : errors) { this.fontContainer.addError(err); } return res && validateShadingPattern(dictionary, errors); }
From source file:net.padaf.preflight.helpers.CatalogValidationHelper.java
License:Apache License
/** * This method checks the content of each OutputIntent. The S entry must * contain GTS_PDFA1. The DestOuputProfile must contain a valid ICC Profile * Stream.//from w ww. j a v a 2 s. co m * * If there are more than one OutputIntent, they have to use the same ICC * Profile. * * This method returns a list of ValidationError. It is empty if no errors * have been found. * * @param handler * @return * @throws ValidationException */ public List<ValidationError> validateOutputIntent(DocumentHandler handler) throws ValidationException { List<ValidationError> result = new ArrayList<ValidationError>(0); PDDocument pdDocument = handler.getDocument(); PDDocumentCatalog catalog = pdDocument.getDocumentCatalog(); COSDocument cDoc = pdDocument.getDocument(); COSBase cBase = catalog.getCOSDictionary() .getItem(COSName.getPDFName(DOCUMENT_DICTIONARY_KEY_OUTPUT_INTENTS)); COSArray outputIntents = COSUtils.getAsArray(cBase, cDoc); Map<COSObjectKey, Boolean> tmpDestOutputProfile = new HashMap<COSObjectKey, Boolean>(); for (int i = 0; outputIntents != null && i < outputIntents.size(); ++i) { COSDictionary dictionary = COSUtils.getAsDictionary(outputIntents.get(i), cDoc); if (dictionary == null) { result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, "OutputIntent object is null or isn't a dictionary")); } else { // ---- S entry is mandatory and must be equals to GTS_PDFA1 String sValue = dictionary.getNameAsString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_S)); if (!OUTPUT_INTENT_DICTIONARY_VALUE_GTS_PDFA1.equals(sValue)) { result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_S_VALUE_INVALID, "The S entry of the OutputIntent isn't GTS_PDFA1")); continue; } // ---- OutputConditionIdentifier is a mandatory field String outputConditionIdentifier = dictionary .getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_OUTPUT_CONDITION_IDENTIFIER)); if (outputConditionIdentifier == null || "".equals(outputConditionIdentifier)) { result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, "The OutputIntentCondition is missing")); continue; } // ---- If OutputConditionIdentifier is "Custom" : // ---- DestOutputProfile and Info are mandatory // ---- DestOutputProfile must be a ICC Profile // ---- Because of PDF/A conforming file needs to specify the color // characteristics, the DestOutputProfile // is checked even if the OutputConditionIdentifier isn't "Custom" COSBase dop = dictionary .getItem(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_DEST_OUTPUT_PROFILE)); ValidationError valer = validateICCProfile(dop, cDoc, tmpDestOutputProfile, handler); if (valer != null) { result.add(valer); continue; } if (OUTPUT_INTENT_DICTIONARY_VALUE_OUTPUT_CONDITION_IDENTIFIER_CUSTOM .equals(outputConditionIdentifier)) { String info = dictionary.getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_INFO)); if (info == null || "".equals(info)) { result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, "The Info entry of a OutputIntent dictionary is missing")); continue; } } } } return result; }
From source file:net.sf.jabref.logic.xmp.XMPUtil.java
License:Open Source License
/** * Helper function for retrieving a BibEntry from the * PDDocumentInformation in a PDF file.//from ww w .j ava 2 s . c o m * * To understand how to get hold of a PDDocumentInformation have a look in * the test cases for XMPUtil. * * The BibEntry is build by mapping individual fields in the document * information (like author, title, keywords) to fields in a bibtex entry. * * @param di * The document information from which to build a BibEntry. * * @return The bibtex entry found in the document information. */ public static Optional<BibEntry> getBibtexEntryFromDocumentInformation(PDDocumentInformation di) { BibEntry entry = new BibEntry(); entry.setType("misc"); String s = di.getAuthor(); if (s != null) { entry.setField("author", s); } s = di.getTitle(); if (s != null) { entry.setField("title", s); } s = di.getKeywords(); if (s != null) { entry.setField("keywords", s); } s = di.getSubject(); if (s != null) { entry.setField("abstract", s); } COSDictionary dict = di.getDictionary(); for (Map.Entry<COSName, COSBase> o : dict.entrySet()) { String key = o.getKey().getName(); if (key.startsWith("bibtex/")) { String value = dict.getString(key); key = key.substring("bibtex/".length()); if ("entrytype".equals(key)) { entry.setType(value); } else { entry.setField(key, value); } } } // Return empty Optional if no values were found return entry.getFieldNames().isEmpty() ? Optional.empty() : Optional.of(entry); }
From source file:net.sf.jabref.util.XMPUtil.java
License:Open Source License
/** * Helper function for retrieving a BibtexEntry from the * PDDocumentInformation in a PDF file./*w ww.j a v a 2 s . c o m*/ * * To understand how to get hold of a PDDocumentInformation have a look in * the test cases for XMPUtil. * * The BibtexEntry is build by mapping individual fields in the document * information (like author, title, keywords) to fields in a bibtex entry. * * @param di * The document information from which to build a BibtexEntry. * * @return The bibtex entry found in the document information. */ @SuppressWarnings("unchecked") public static BibtexEntry getBibtexEntryFromDocumentInformation(PDDocumentInformation di) { BibtexEntry entry = new BibtexEntry(); String s = di.getAuthor(); if (s != null) { entry.setField("author", s); } s = di.getTitle(); if (s != null) { entry.setField("title", s); } s = di.getKeywords(); if (s != null) { entry.setField("keywords", s); } s = di.getSubject(); if (s != null) { entry.setField("abstract", s); } COSDictionary dict = di.getDictionary(); for (Map.Entry<COSName, COSBase> o : dict.entrySet()) { String key = o.getKey().getName(); if (key.startsWith("bibtex/")) { String value = dict.getString(key); key = key.substring("bibtex/".length()); if (key.equals("entrytype")) { BibtexEntryType type = BibtexEntryType.getStandardType(value); if (type != null) { entry.setType(type); } } else { entry.setField(key, value); } } } // Return null if no values were found return (!entry.getAllFields().isEmpty() ? entry : null); }
From source file:org.apache.padaf.preflight.font.Type3FontValidator.java
License:Apache License
/** * If the Resources entry is present, this method check its content. Only * fonts and Images are checked because this resource describes glyphs. REMARK * : The font and the image aren't validated because they will be validated by * an other ValidationHelper./* w ww .j ava 2s . c om*/ * * @return */ private boolean checkResources() throws ValidationException { if (this.resources == null) { // ---- No resources dictionary. return true; } COSDocument cDoc = this.handler.getDocument().getDocument(); COSDictionary dictionary = COSUtils.getAsDictionary(this.resources, cDoc); if (dictionary == null) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID, "The Resources element isn't a dictionary")); return false; } COSBase cbImg = dictionary.getItem(COSName.getPDFName(DICTIONARY_KEY_XOBJECT)); COSBase cbFont = dictionary.getItem(COSName.getPDFName(DICTIONARY_KEY_FONT)); if (cbImg != null) { // ---- the referenced objects must be present in the PDF file COSDictionary dicImgs = COSUtils.getAsDictionary(cbImg, cDoc); Set<COSName> keyList = dicImgs.keySet(); for (Object key : keyList) { COSBase item = dictionary.getItem((COSName) key); COSDictionary xObjImg = COSUtils.getAsDictionary(item, cDoc); if (xObjImg == null) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID, "The Resources dictionary of type 3 font is invalid")); return false; } if (!XOBJECT_DICTIONARY_VALUE_SUBTYPE_IMG .equals(xObjImg.getString(COSName.getPDFName(DICTIONARY_KEY_SUBTYPE)))) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID, "The Resources dictionary of type 3 font is invalid")); return false; } } } if (cbFont != null) { // ---- the referenced object must be present in the PDF file COSDictionary dicFonts = COSUtils.getAsDictionary(cbFont, cDoc); Set<COSName> keyList = dicFonts.keySet(); for (Object key : keyList) { COSBase item = dictionary.getItem((COSName) key); COSDictionary xObjFont = COSUtils.getAsDictionary(item, cDoc); if (xObjFont == null) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID, "The Resources dictionary of type 3 font is invalid")); return false; } if (!FONT_DICTIONARY_VALUE_FONT .equals(xObjFont.getString(COSName.getPDFName(DICTIONARY_KEY_TYPE)))) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_DICTIONARY_INVALID, "The Resources dictionary of type 3 font is invalid")); return false; } try { PDFont aFont = PDFontFactory.createFont(xObjFont); // FontContainer aContainer = this.handler.retrieveFontContainer(aFont); AbstractFontContainer aContainer = this.handler.getFont(aFont.getCOSObject()); // ---- another font is used in the Type3, check if the font is valid. if (aContainer.isValid() != State.VALID) { this.fontContainer.addError(new ValidationError(ERROR_FONTS_TYPE3_DAMAGED, "The Resources dictionary of type 3 font contains invalid font")); return false; } } catch (IOException e) { throw new ValidationException("Unable to valid the Type3 : " + e.getMessage()); } } } List<ValidationError> errors = new ArrayList<ValidationError>(); ExtGStateContainer extGStates = new ExtGStateContainer(dictionary, cDoc); boolean res = extGStates.validateTransparencyRules(errors); for (ValidationError err : errors) { this.fontContainer.addError(err); } return res && validateShadingPattern(dictionary, errors); }
From source file:org.apache.padaf.preflight.helpers.CatalogValidationHelper.java
License:Apache License
/** * This method checks the content of each OutputIntent. The S entry must * contain GTS_PDFA1. The DestOuputProfile must contain a valid ICC Profile * Stream./*from ww w. jav a 2 s . c o m*/ * * If there are more than one OutputIntent, they have to use the same ICC * Profile. * * This method returns a list of ValidationError. It is empty if no errors * have been found. * * @param handler * @return * @throws ValidationException */ public List<ValidationError> validateOutputIntent(DocumentHandler handler) throws ValidationException { List<ValidationError> result = new ArrayList<ValidationError>(0); PDDocument pdDocument = handler.getDocument(); PDDocumentCatalog catalog = pdDocument.getDocumentCatalog(); COSDocument cDoc = pdDocument.getDocument(); COSBase cBase = catalog.getCOSDictionary() .getItem(COSName.getPDFName(DOCUMENT_DICTIONARY_KEY_OUTPUT_INTENTS)); COSArray outputIntents = COSUtils.getAsArray(cBase, cDoc); Map<COSObjectKey, Boolean> tmpDestOutputProfile = new HashMap<COSObjectKey, Boolean>(); for (int i = 0; outputIntents != null && i < outputIntents.size(); ++i) { COSDictionary dictionary = COSUtils.getAsDictionary(outputIntents.get(i), cDoc); if (dictionary == null) { result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, "OutputIntent object is null or isn't a dictionary")); } else { // ---- S entry is mandatory and must be equals to GTS_PDFA1 String sValue = dictionary.getNameAsString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_S)); if (!OUTPUT_INTENT_DICTIONARY_VALUE_GTS_PDFA1.equals(sValue)) { result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_S_VALUE_INVALID, "The S entry of the OutputIntent isn't GTS_PDFA1")); continue; } // ---- OutputConditionIdentifier is a mandatory field String outputConditionIdentifier = dictionary .getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_OUTPUT_CONDITION_IDENTIFIER)); if (outputConditionIdentifier == null) {// empty string is autorized (it may be an application specific value) result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, "The OutputIntentCondition is missing")); continue; } // ---- If OutputConditionIdentifier is "Custom" or a non Standard ICC Characterization : // ---- DestOutputProfile and Info are mandatory // ---- DestOutputProfile must be a ICC Profile // ---- Because of PDF/A conforming file needs to specify the color characteristics, the DestOutputProfile // ---- is checked even if the OutputConditionIdentifier isn't "Custom" COSBase dop = dictionary .getItem(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_DEST_OUTPUT_PROFILE)); ValidationError valer = validateICCProfile(dop, cDoc, tmpDestOutputProfile, handler); if (valer != null) { result.add(valer); continue; } // TODO [LAZY] When Lazy mode will be added, this block should be uncommented to set result as warning. // if (!isStandardICCCharacterization(outputConditionIdentifier)) { // String info = dictionary.getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_INFO)); // if (info == null || "".equals(info)) { // result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, // "The Info entry of a OutputIntent dictionary is missing")); // continue; // } // } } } return result; }
From source file:org.xstudiosys.pdfxmp.XMPUtil.java
License:Open Source License
/** * Helper function for retrieving a BibtexEntry from the * PDDocumentInformation in a PDF file.//www . j a va 2 s.c o m * * To understand how to get hold of a PDDocumentInformation have a look in * the test cases for XMPUtil. * * The BibtexEntry is build by mapping individual fields in the document * information (like author, title, keywords) to fields in a bibtex entry. * * @param di * The document information from which to build a BibtexEntry. * * @return The bibtex entry found in the document information. */ @SuppressWarnings("unchecked") public static BibtexEntry getBibtexEntryFromDocumentInformation(PDDocumentInformation di) { BibtexEntry entry = new BibtexEntry(); String s = di.getAuthor(); if (s != null) entry.setField("author", s); s = di.getTitle(); if (s != null) entry.setField("title", s); s = di.getKeywords(); if (s != null) entry.setField("keywords", s); s = di.getSubject(); if (s != null) entry.setField("abstract", s); COSDictionary dict = di.getDictionary(); for (Map.Entry<COSName, COSBase> o : dict.entrySet()) { String key = o.getKey().getName(); if (key.startsWith("bibtex/")) { String value = dict.getString(key); key = key.substring("bibtex/".length()); if (key.equals("entrytype")) { BibtexEntryType type = BibtexEntryType.getStandardType(value); if (type != null) entry.setType(type); } else entry.setField(key, value); } } // Return null if no values were found return (entry.getAllFields().size() > 0 ? entry : null); }
From source file:uk.ac.liverpool.thumbnails.PDFService.java
License:Open Source License
@Override public FontInformation[] extractFontList(URI u, File fff) throws MalformedURLException, IOException { SortedSet<FontInformation> ret = new TreeSet<FontInformation>(); PDDocument document = getPages(u, fff); List pages = document.getDocumentCatalog().getAllPages(); int i = 0;/* w w w. j ava 2s. co m*/ // The code down here is easier as it gets all the fonts used in the document. Still, this would inlcude unused fonts, so we get the fonts page by page and add them to a Hash table. for (COSObject c : document.getDocument().getObjectsByType(COSName.FONT)) { if (c == null || !(c.getObject() instanceof COSDictionary)) continue; //System.out.println(c.getObject()); COSDictionary fontDictionary = (COSDictionary) c.getObject(); // System.out.println(dic.getNameAsString(COSName.BASE_FONT)); // } // } // int pagen = document.getNumberOfPages(); // i=0; // for (int p=0;p<pagen;p++){ // PDPage page = (PDPage)pages.get(p); // PDResources res = page.findResources(); // //for each page resources // if (res==null) continue; // // get the font dictionary // COSDictionary fonts = (COSDictionary) res.getCOSDictionary().getDictionaryObject( COSName.FONT ); // for( COSName fontName : fonts.keySet() ) { // COSObject font = (COSObject) fonts.getItem( fontName ); // // if the font has already been visited we ingore it // long objectId = font.getObjectNumber().longValue(); // if (ret.get(objectId)!=null) // continue; // if( font==null || ! (font.getObject() instanceof COSDictionary) ) // continue; // COSDictionary fontDictionary = (COSDictionary)font.getObject(); // Type MUSt be font if (!fontDictionary.getNameAsString(COSName.TYPE).equals("Font")) continue; // get the variables FontInformation fi = new FontInformation(); fi.fontType = fontDictionary.getNameAsString(COSName.SUBTYPE); String baseFont = fontDictionary.getNameAsString(COSName.BASE_FONT); if (baseFont == null) continue; if (Arrays.binarySearch(standard14, baseFont) >= 0) continue; COSDictionary fontDescriptor = (COSDictionary) fontDictionary.getDictionaryObject(COSName.FONT_DESC); COSBase enc = fontDictionary.getItem(COSName.ENCODING); COSBase uni = fontDictionary.getItem(COSName.TO_UNICODE); int firstChar = fontDictionary.getInt(COSName.FIRST_CHAR); int lastChar = fontDictionary.getInt(COSName.LAST_CHAR); String encoding; boolean toUnicode = uni != null; if (enc == null) { encoding = "standard14"; } if (enc instanceof COSString) { encoding = ((COSString) enc).getString(); } else { encoding = "table"; } fi.isSubset = false; boolean t = true; // Type one and TT can have subsets defineing the basename see 5.5.3 pdfref 1.6 // if (fi.fontType.lastIndexOf(COSName.TYPE1.getName())!=-1 || fi.fontType.equals(COSName.TRUE_TYPE.getName()) ) if (baseFont != null) { if (baseFont.length() > 6) { for (int k = 0; k < 6; k++) if (!Character.isUpperCase(baseFont.charAt(k))) t = false; if (baseFont.charAt(6) != '+') t = false; } else t = false; fi.isSubset = t; if (fi.isSubset) baseFont = baseFont.substring(7); } fi.fontFlags = 0; if (fi.fontType.equals(COSName.TYPE0) || fi.fontType.equals(COSName.TYPE3)) fi.isEmbedded = true; if (fontDescriptor != null) { // in Type1 charset indicates font is subsetted if (fontDescriptor.getItem(COSName.CHAR_SET) != null) fi.isSubset = true; if (fontDescriptor.getItem(COSName.FONT_FILE) != null || fontDescriptor.getItem(COSName.FONT_FILE3) != null || fontDescriptor.getItem(COSName.FONT_FILE2) != null) fi.isEmbedded = true; fi.fontFlags = fontDescriptor.getInt(COSName.getPDFName("Flags")); fi.fontFamily = fontDescriptor.getString(COSName.FONT_FAMILY); fi.fontStretch = fontDescriptor.getString(COSName.FONT_STRETCH); } fi.charset = encoding; fi.fontName = baseFont; fi.isToUnicode = toUnicode; ret.add(fi); } // for all fonts // } // for all pages Iterator<FontInformation> it = ret.iterator(); FontInformation prev = null; LinkedList<FontInformation> toDelete = new LinkedList<FontInformation>(); while (it.hasNext()) { FontInformation current = it.next(); if (prev != null && prev.fontName.equals(current.fontName) && prev.fontType.startsWith("CIDFontType")) toDelete.add(current); prev = current; } ret.removeAll(toDelete); FontInformation[] retArray = ret.toArray(new FontInformation[0]); return retArray; }