Example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog.

Prototype

public PDDocumentCatalog getDocumentCatalog()

Source Link

Document

This will get the document CATALOG.

Usage

From source file:mj.ocraptor.extraction.tika.parser.pdf.PDF2XHTML.java

License:Apache License

private void extractAcroForm(PDDocument pdf, XHTMLContentHandler handler) throws IOException, SAXException {
    // Thank you, Ben Litchfield, for
    // org.apache.pdfbox.examples.fdf.PrintFields
    // this code derives from Ben's code
    PDDocumentCatalog catalog = pdf.getDocumentCatalog();

    if (catalog == null)
        return;//from w ww. j  av  a 2s . co m

    PDAcroForm form = catalog.getAcroForm();
    if (form == null)
        return;

    @SuppressWarnings("rawtypes")
    List fields = form.getFields();

    if (fields == null)
        return;

    @SuppressWarnings("rawtypes")
    ListIterator itr = fields.listIterator();

    if (itr == null)
        return;

    handler.startElement("div", "class", "acroform");
    handler.startElement("ol");
    while (itr.hasNext()) {
        Object obj = itr.next();
        if (obj != null && obj instanceof PDField) {
            processAcroField((PDField) obj, handler, 0);
        }
    }
    handler.endElement("ol");
    handler.endElement("div");
}

From source file:mj.ocraptor.extraction.tika.parser.pdf.PDF2XHTML.java

License:Apache License

/**
 *
 *
 * @param pdf//w  ww  .  java 2  s  . co  m
 *
 * @throws SAXException
 */
private void extractImageText(PDDocument pdf) {
    List<?> pages = pdf.getDocumentCatalog().getAllPages();
    Iterator<?> pageIterator = pages.iterator();
    pageCount = pages.size();
    imageCount = 0;
    int currentPage = 0;
    TikaImageHelper helper = new TikaImageHelper(this.metadata);
    try {
        while (pageIterator.hasNext()) {
            PDPage page = (PDPage) pageIterator.next();
            PDResources resources = page.getResources();
            processResources(resources, helper);
            helper.addTextToHandler(handler, ++currentPage, pageCount);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (helper != null) {
            helper.close();
        }
    }
}

From source file:modules.PDFFontDependencyExtractorModule.java

License:Apache License

public PDFFontResults extractFontList(File f) throws IOException, InvalidParameterException {
    PDDocument document;
    try {//ww  w . ja va2s . c om
        document = PDDocument.load(f);
    } catch (IOException x) {
        throw new InvalidParameterException("Not a PDF file");
    }
    SortedSet<FontInformation> ret = new TreeSet<FontInformation>(new Comparator<FontInformation>() {

        @Override
        public int compare(FontInformation o1, FontInformation o2) {
            int a = o1.fontName.compareTo(o2.fontName);
            if (a != 0)
                return a;
            else
                return o1.fontType.compareTo(o2.fontType);
        }

    });

    document.getDocumentCatalog().getAllPages();
    // The code down here is easier as it gets all the fonts used in the
    // document. Still, this would inlcude unused fonts, so we get the fonts
    // page by page and add them to a Hash table.
    for (COSObject c : document.getDocument().getObjectsByType(COSName.FONT)) {
        if (c == null || !(c.getObject() instanceof COSDictionary)) {
            continue;
            // System.out.println(c.getObject());
        }

        COSDictionary fontDictionary = (COSDictionary) c.getObject();
        // System.out.println(dic.getNameAsString(COSName.BASE_FONT));
        // }
        // }
        // int pagen = document.getNumberOfPages();
        // i=0;
        // for (int p=0;p<pagen;p++){
        // PDPage page = (PDPage)pages.get(p);
        // PDResources res = page.findResources();
        // //for each page resources
        // if (res==null) continue;
        // // get the font dictionary
        // COSDictionary fonts = (COSDictionary)
        // res.getCOSDictionary().getDictionaryObject( COSName.FONT );
        // for( COSName fontName : fonts.keySet() ) {
        // COSObject font = (COSObject) fonts.getItem( fontName );
        // // if the font has already been visited we ingore it
        // long objectId = font.getObjectNumber().longValue();
        // if (ret.get(objectId)!=null)
        // continue;
        // if( font==null || ! (font.getObject() instanceof COSDictionary) )
        // continue;
        // COSDictionary fontDictionary = (COSDictionary)font.getObject();

        // Type MUSt be font
        if (!fontDictionary.getNameAsString(COSName.TYPE).equals("Font")) {
            continue;
        }
        // get the variables
        FontInformation fi = new FontInformation();
        fi.fontType = fontDictionary.getNameAsString(COSName.SUBTYPE);

        String baseFont = fontDictionary.getNameAsString(COSName.BASE_FONT);
        if (baseFont == null) {
            continue;
        }
        if (Arrays.binarySearch(standard14, baseFont) >= 0) {
            continue;
        }
        COSDictionary fontDescriptor = (COSDictionary) fontDictionary.getDictionaryObject(COSName.FONT_DESC);
        COSBase enc = fontDictionary.getItem(COSName.ENCODING);
        COSBase uni = fontDictionary.getItem(COSName.TO_UNICODE);
        fontDictionary.getInt(COSName.FIRST_CHAR);
        fontDictionary.getInt(COSName.LAST_CHAR);
        String encoding;
        boolean toUnicode = uni != null;
        if (enc == null) {
            encoding = "standard14";
        }
        if (enc instanceof COSString) {
            encoding = ((COSString) enc).getString();
        } else {
            encoding = "table";
        }
        fi.isSubset = false;
        boolean t = true;
        // Type one and TT can have subsets defineing the basename see 5.5.3
        // pdfref 1.6
        // if (fi.fontType.lastIndexOf(COSName.TYPE1.getName())!=-1 ||
        // fi.fontType.equals(COSName.TRUE_TYPE.getName()) )
        if (baseFont != null) {
            if (baseFont.length() > 6) {
                for (int k = 0; k < 6; k++)
                    if (!Character.isUpperCase(baseFont.charAt(k))) {
                        t = false;
                    }
                if (baseFont.charAt(6) != '+') {
                    t = false;
                }
            } else {
                t = false;
            }
            fi.isSubset = t;
            if (fi.isSubset) {
                fi.baseName = baseFont.substring(0, 6);
                baseFont = baseFont.substring(7);
            }
        }
        fi.fontFlags = 0;
        if (fi.fontType.equals(COSName.TYPE0.getName()) || fi.fontType.equals(COSName.TYPE3.getName())) {
            fi.isEmbedded = true;
        }

        if (fontDescriptor != null) {
            // in Type1 charset indicates font is subsetted
            if (fontDescriptor.getItem(COSName.CHAR_SET) != null) {
                fi.isSubset = true;
            }
            if (fontDescriptor.getItem(COSName.FONT_FILE) != null
                    || fontDescriptor.getItem(COSName.FONT_FILE3) != null
                    || fontDescriptor.getItem(COSName.FONT_FILE2) != null) {
                fi.isEmbedded = true;
            }
            fi.fontFlags = fontDescriptor.getInt(COSName.getPDFName("Flags"));
            fi.fontFamily = fontDescriptor.getString(COSName.FONT_FAMILY);
            fi.fontStretch = fontDescriptor.getString(COSName.FONT_STRETCH);

        }
        fi.charset = encoding;
        fi.fontName = baseFont;
        fi.isToUnicode = toUnicode;
        fi.encoding = fontDictionary.getNameAsString(COSName.CID_TO_GID_MAP);

        ret.add(fi);

    } // for all fonts

    HashMultimap<String, FontInformation> m = HashMultimap.create();

    for (FontInformation ff : ret) {
        m.put(ff.fontName, ff);
    }
    LinkedList<FontInformation> missing = new LinkedList<FontInformation>();
    Set<String> k = m.keySet();
    for (String kk : k) {
        Set<FontInformation> s = m.get(kk);
        if (s.size() < 1) {
            continue;
        }
        if (s.size() > 1) {
            boolean found = false;
            FontInformation ff = null;
            for (FontInformation fonti : s) {
                if (!fonti.isEmbedded) {
                    ff = fonti;
                } else {
                    found = true;
                }
            }
            if (!found) {
                missing.add(ff);
            }
        } else {
            FontInformation ff = s.iterator().next();
            if (!ff.isEmbedded) {
                missing.add(ff);
            }
        }

    }

    // } // for all pages
    // Iterator<FontInformation> it = ret.iterator();
    // FontInformation prev = null;
    // LinkedList<FontInformation> toDelete = new
    // LinkedList<FontInformation>();
    // while (it.hasNext()) {
    // FontInformation current = it.next();
    //
    // if (prev!= null && prev.fontName.equals(current.fontName) &&
    // (prev.fontType.startsWith("CIDFontType") ||
    // current.fontType.startsWith("CIDFontType")))
    // toDelete.add(current);
    // prev = current;
    // }
    //
    // //ret.removeAll(toDelete);
    // FontInformation[] retArray =toDelete.toArray(new FontInformation[0]);
    //

    if (missing.size() == 0) {
        missing = null;
    } else {
        System.out.println("Found missing fonts: " + f);
        System.out.println(missing);
    }
    return new PDFFontResults(new LinkedList<FontInformation>(ret), missing);
}

From source file:name.marcelomorales.siqisiqi.pdfbox.CoordinatesGenerator.java

License:Apache License

public void generarPdf(OutputStream os, String template, Map<String, Object> m, String path, String coordenates,
        float fontSize, float ancho) throws IOException {
    long t = System.currentTimeMillis();
    PDDocument doc = null;
    try {/*from  w  w  w  .j  av a2s .c o m*/
        doc = PDDocument.load(new File(path));

        List pages = doc.getDocumentCatalog().getAllPages();

        PDPage sourcePage = (PDPage) pages.get(0);

        boolean append = sourcePage.getContents() != null;
        PDPageContentStream contentStream = new PDPageContentStream(doc, sourcePage, append, true);

        StringReader fileReader = null;
        try {

            fileReader = new StringReader(template);
            List<String> list = CharStreams.readLines(fileReader);
            boolean textHasBegun = false;
            float currentOffset = 0f;
            for (String line : list) {

                if (line == null) {
                    continue;
                }

                if (line.startsWith("#")) {
                    continue;
                }

                final Iterable<String> str = Splitter.on(',').omitEmptyStrings().trimResults().split(line);
                final String[] split = Iterables.toArray(str, String.class);
                if (split == null || split.length < 4) {
                    continue;
                }

                if (Character.isDigit(split[0].charAt(0))) {
                    if (textHasBegun) {
                        contentStream.endText();
                    }
                    contentStream.beginText();
                    textHasBegun = true;
                    contentStream.moveTextPositionByAmount(parseFloat(split[0]), parseFloat(split[1]));
                } else {
                    contentStream.moveTextPositionByAmount(currentOffset, 0);
                }

                if (!textHasBegun) {
                    LOGGER.warn("Hay un posible mal uso de un .ree", new Throwable());
                    contentStream.beginText();
                    textHasBegun = true;
                }

                PDType1Font font;
                if ("b".equals(split[2])) {
                    font = HELVETICA_BOLD;
                } else {
                    font = HELVETICA;
                }
                contentStream.setFont(font, fontSize);

                Object text = null;
                if (split[3].startsWith("\"")) {
                    // TODO: text = substring(split[3], 1, -1);
                } else {
                    // TODO: text = new PropertyModel(m, split[3]).getObject();
                }

                if (text == null) {
                    LOGGER.warn("Propiedad {} no se encuentra", split[3]);
                    //contentStream.drawString("ERROR: propiedad no encontrada");
                    contentStream.drawString(" ");
                } else {
                    String string = text.toString();
                    currentOffset = font.getStringWidth(string) * ancho;
                    contentStream.drawString(string);
                }
            }

            if (textHasBegun) {
                contentStream.endText();
            }
        } finally {
            Closeables.closeQuietly(fileReader);
        }

        contentStream.close();

        try {
            doc.save(os);
        } catch (COSVisitorException e) {
            throw new IOException("Ha ocurrido un error al escribir en el Os", e);
        }
    } finally {
        if (doc != null) {
            doc.close();
        }
        LOGGER.info("Me ha tomado {} milisegundos hacer el pdf", System.currentTimeMillis() - t);
    }
}

From source file:net.padaf.preflight.helpers.CatalogValidationHelper.java

License:Apache License

@Override
public List<ValidationError> innerValidate(DocumentHandler handler) throws ValidationException {
    List<ValidationError> result = new ArrayList<ValidationError>(0);
    PDDocument pdfbox = handler.getDocument();
    PDDocumentCatalog catalog = pdfbox.getDocumentCatalog();
    if (catalog != null) {
        validateActions(handler, catalog, result);
        validateLang(handler, catalog, result);
        validateNames(handler, catalog, result);
        validateOCProperties(handler, catalog, result);
    } else {/*w w  w .j  av a  2 s .  com*/
        throw new ValidationException("There are no Catalog entry in the Document.");
    }

    // ---- Check OutputIntent to know the ICC Profile
    result.addAll(validateOutputIntent(handler));

    return result;
}

From source file:net.padaf.preflight.helpers.CatalogValidationHelper.java

License:Apache License

/**
 * This method checks the content of each OutputIntent. The S entry must
 * contain GTS_PDFA1. The DestOuputProfile must contain a valid ICC Profile
 * Stream./*from   w w  w.  j a v a 2 s.  com*/
 * 
 * If there are more than one OutputIntent, they have to use the same ICC
 * Profile.
 * 
 * This method returns a list of ValidationError. It is empty if no errors
 * have been found.
 * 
 * @param handler
 * @return
 * @throws ValidationException
 */
public List<ValidationError> validateOutputIntent(DocumentHandler handler) throws ValidationException {
    List<ValidationError> result = new ArrayList<ValidationError>(0);
    PDDocument pdDocument = handler.getDocument();
    PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
    COSDocument cDoc = pdDocument.getDocument();

    COSBase cBase = catalog.getCOSDictionary()
            .getItem(COSName.getPDFName(DOCUMENT_DICTIONARY_KEY_OUTPUT_INTENTS));
    COSArray outputIntents = COSUtils.getAsArray(cBase, cDoc);

    Map<COSObjectKey, Boolean> tmpDestOutputProfile = new HashMap<COSObjectKey, Boolean>();

    for (int i = 0; outputIntents != null && i < outputIntents.size(); ++i) {
        COSDictionary dictionary = COSUtils.getAsDictionary(outputIntents.get(i), cDoc);

        if (dictionary == null) {

            result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
                    "OutputIntent object is null or isn't a dictionary"));

        } else {
            // ---- S entry is mandatory and must be equals to GTS_PDFA1
            String sValue = dictionary.getNameAsString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_S));
            if (!OUTPUT_INTENT_DICTIONARY_VALUE_GTS_PDFA1.equals(sValue)) {
                result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_S_VALUE_INVALID,
                        "The S entry of the OutputIntent isn't GTS_PDFA1"));
                continue;
            }

            // ---- OutputConditionIdentifier is a mandatory field
            String outputConditionIdentifier = dictionary
                    .getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_OUTPUT_CONDITION_IDENTIFIER));
            if (outputConditionIdentifier == null || "".equals(outputConditionIdentifier)) {
                result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
                        "The OutputIntentCondition is missing"));
                continue;
            }

            // ---- If OutputConditionIdentifier is "Custom" :
            // ---- DestOutputProfile and Info are mandatory
            // ---- DestOutputProfile must be a ICC Profile

            // ---- Because of PDF/A conforming file needs to specify the color
            // characteristics, the DestOutputProfile
            // is checked even if the OutputConditionIdentifier isn't "Custom"
            COSBase dop = dictionary
                    .getItem(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_DEST_OUTPUT_PROFILE));
            ValidationError valer = validateICCProfile(dop, cDoc, tmpDestOutputProfile, handler);
            if (valer != null) {
                result.add(valer);
                continue;
            }

            if (OUTPUT_INTENT_DICTIONARY_VALUE_OUTPUT_CONDITION_IDENTIFIER_CUSTOM
                    .equals(outputConditionIdentifier)) {
                String info = dictionary.getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_INFO));
                if (info == null || "".equals(info)) {
                    result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
                            "The Info entry of a OutputIntent dictionary is missing"));
                    continue;
                }
            }
        }
    }
    return result;
}

From source file:net.padaf.preflight.helpers.MetadataValidationHelper.java

License:Apache License

/**
 * Check if metadata dictionary has no stream filter
 * /*ww  w  . j av a2 s.  co m*/
 * @param doc
 * @return
 */
protected List<ValidationError> checkStreamFilterUsage(PDDocument doc) {
    List<ValidationError> ve = new ArrayList<ValidationError>();
    List<?> filters = doc.getDocumentCatalog().getMetadata().getFilters();
    if (filters != null && !filters.isEmpty()) {
        ve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MAIN,
                "Using stream filter on metadata dictionary is forbidden"));
    }
    return ve;
}

From source file:net.sf.jabref.logic.xmp.XMPUtil.java

License:Open Source License

/**
 * @return empty Optional if no metadata has been found
 *///from   w ww .ja va  2  s .co  m
private static Optional<XMPMetadata> getXMPMetadata(PDDocument document) throws IOException {
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDMetadata metaRaw = catalog.getMetadata();

    if (metaRaw == null) {
        return Optional.empty();
    }

    Document parseResult;
    try (InputStream is = metaRaw.createInputStream()) {
        parseResult = XMLUtil.parse(is);
    }
    XMPMetadata meta = new XMPMetadata(parseResult);
    meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE, XMPSchemaBibtex.class);
    return Optional.of(meta);
}

From source file:net.sf.jabref.logic.xmp.XMPUtil.java

License:Open Source License

/**
 * Try to write the given BibTexEntries as DublinCore XMP Schemas
 *
 * Existing DublinCore schemas in the document are removed
 *
 * @param document/*from  ww  w  . ja  v  a2s . c o m*/
 *            The pdf document to write to.
 * @param entries
 *            The BibTeX entries that are written as schemas
 * @param database
 *            maybenull An optional database which the given BibTeX entries
 *            belong to, which will be used to resolve strings. If the
 *            database is null the strings will not be resolved.
 * @throws IOException
 * @throws TransformerException
 */
private static void writeDublinCore(PDDocument document, Collection<BibEntry> entries, BibDatabase database)
        throws IOException, TransformerException {

    Collection<BibEntry> resolvedEntries;
    if (database == null) {
        resolvedEntries = entries;
    } else {
        resolvedEntries = database.resolveForStrings(entries, false);
    }

    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDMetadata metaRaw = catalog.getMetadata();

    XMPMetadata meta;
    if (metaRaw == null) {
        meta = new XMPMetadata();
    } else {
        meta = new XMPMetadata(XMLUtil.parse(metaRaw.createInputStream()));
    }

    // Remove all current Dublin-Core schemas
    List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
    for (XMPSchema schema : schemas) {
        schema.getElement().getParentNode().removeChild(schema.getElement());
    }

    for (BibEntry entry : resolvedEntries) {
        XMPSchemaDublinCore dcSchema = new XMPSchemaDublinCore(meta);
        XMPUtil.writeToDCSchema(dcSchema, entry, null);
        meta.addSchema(dcSchema);
    }

    // Save to stream and then input that stream to the PDF
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    meta.save(os);
    ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
    PDMetadata metadataStream = new PDMetadata(document, is, false);
    catalog.setMetadata(metadataStream);
}

From source file:net.sf.jabref.PdfPreviewPanel.java

License:Open Source License

private void renderPDFFile(File file) {
    InputStream input;//from   www .  j a v  a 2s. c  o m
    try {
        input = new FileInputStream(file);
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        return;
    }

    PDDocument document;
    try {
        document = PDDocument.load(input);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        return;
    }
    @SuppressWarnings("unchecked")
    List<PDPage> pages = document.getDocumentCatalog().getAllPages();

    PDPage page = pages.get(0);
    BufferedImage image;
    try {
        image = page.convertToImage();
    } catch (Exception e1) {
        // silently ignores all rendering exceptions
        image = null;
    }

    if (image != null) {
        int width = this.getParent().getWidth();
        int height = this.getParent().getHeight();
        BufferedImage resImage = resizeImage(image, width, height, BufferedImage.TYPE_INT_RGB);
        ImageIcon icon = new ImageIcon(resImage);
        picLabel.setText(null);
        picLabel.setIcon(icon);
    } else {
        clearPreview();
    }

    try {
        document.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}