Example usage for org.apache.pdfbox.pdmodel PDDocumentCatalog getLanguage

List of usage examples for org.apache.pdfbox.pdmodel PDDocumentCatalog getLanguage

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocumentCatalog getLanguage.

Prototype

public String getLanguage() 

Source Link

Document

Returns the language for the document, or null.

Usage

From source file:com.jaeksoft.searchlib.parser.PdfParser.java

License:Open Source License

private void extractMetaData(ParserResultItem result, PDDocument pdf) throws IOException {
    PDDocumentInformation info = pdf.getDocumentInformation();
    if (info != null) {
        result.addField(ParserFieldEnum.title, info.getTitle());
        result.addField(ParserFieldEnum.subject, info.getSubject());
        result.addField(ParserFieldEnum.author, info.getAuthor());
        result.addField(ParserFieldEnum.producer, info.getProducer());
        result.addField(ParserFieldEnum.keywords, info.getKeywords());
        String d = getDate(getCreationDate(info));
        if (d != null)
            result.addField(ParserFieldEnum.creation_date, d);
        d = getDate(getModificationDate(info));
        if (d != null)
            result.addField(ParserFieldEnum.modification_date, d);
    }/*w w  w.ja v a  2  s. c om*/
    int pages = pdf.getNumberOfPages();
    result.addField(ParserFieldEnum.number_of_pages, pages);
    PDDocumentCatalog catalog = pdf.getDocumentCatalog();
    if (catalog != null) {
        result.addField(ParserFieldEnum.language, catalog.getLanguage());
    }
}

From source file:com.opensearchserver.extractor.parser.PdfBox.java

License:Apache License

private void extractMetaData(PDDocument pdf) throws IOException {
    PDDocumentInformation info = pdf.getDocumentInformation();
    if (info != null) {
        metas.add(TITLE, info.getTitle());
        metas.add(SUBJECT, info.getSubject());
        metas.add(AUTHOR, info.getAuthor());
        metas.add(PRODUCER, info.getProducer());
        metas.add(KEYWORDS, info.getKeywords());
        metas.add(CREATION_DATE, getDate(getCreationDate(info)));
        metas.add(MODIFICATION_DATE, getModificationDate(info));
    }// w  w w . j  av a  2  s  . c o  m
    int pages = pdf.getNumberOfPages();
    metas.add(NUMBER_OF_PAGES, pages);
    PDDocumentCatalog catalog = pdf.getDocumentCatalog();
    if (catalog != null)
        metas.add(LANGUAGE, catalog.getLanguage());
}

From source file:com.qwazr.library.pdfbox.PdfBoxParser.java

License:Apache License

private void extractMetaData(final PDDocument pdf, final ParserFieldsBuilder metas) {
    metas.set(MIME_TYPE, DEFAULT_MIMETYPES[0]);
    final PDDocumentInformation info = pdf.getDocumentInformation();
    if (info != null) {
        metas.add(TITLE, info.getTitle());
        metas.add(SUBJECT, info.getSubject());
        metas.add(AUTHOR, info.getAuthor());
        metas.add(PRODUCER, info.getProducer());
        metas.add(KEYWORDS, info.getKeywords());
        metas.add(CREATION_DATE, info.getCreationDate());
        metas.add(MODIFICATION_DATE, info.getModificationDate());
    }//from   w ww  . j ava 2  s .  co m
    int pages = pdf.getNumberOfPages();
    metas.add(NUMBER_OF_PAGES, pages);
    PDDocumentCatalog catalog = pdf.getDocumentCatalog();
    if (catalog != null)
        metas.add(LANGUAGE, catalog.getLanguage());
}

From source file:net.padaf.preflight.helpers.CatalogValidationHelper.java

License:Apache License

/**
 * The Lang element is optional but it is recommended. This method check the
 * Syntax of the Lang if this entry is present.
 * /*from   w w  w .  jav  a 2 s . com*/
 * @param handler
 * @param catalog
 * @param result
 * @throws ValidationException
 */
protected void validateLang(DocumentHandler handler, PDDocumentCatalog catalog, List<ValidationError> result)
        throws ValidationException {
    String lang = catalog.getLanguage();
    if (lang != null && !lang.matches("[A-Za-z]{1,8}(-[A-Za-z]{1,8})*")) {
        result.add(new ValidationError(ERROR_SYNTAX_LANG_NOT_RFC1766));
    }
}

From source file:org.apache.padaf.preflight.helpers.CatalogValidationHelper.java

License:Apache License

/**
 * The Lang element is optional but it is recommended. This method check the
 * Syntax of the Lang if this entry is present.
 * /*w  ww .  j ava2 s  . com*/
 * @param handler
 * @param catalog
 * @param result
 * @throws ValidationException
 */
protected void validateLang(DocumentHandler handler, PDDocumentCatalog catalog, List<ValidationError> result)
        throws ValidationException {
    String lang = catalog.getLanguage();
    if (lang != null && !"".equals(lang) && !lang.matches("[A-Za-z]{1,8}(-[A-Za-z]{1,8})*")) {
        result.add(new ValidationError(ERROR_SYNTAX_LANG_NOT_RFC1766));
    }
}