List of usage examples for org.apache.pdfbox.pdmodel PDDocumentCatalog getLanguage
public String getLanguage()
From source file:com.jaeksoft.searchlib.parser.PdfParser.java
License:Open Source License
private void extractMetaData(ParserResultItem result, PDDocument pdf) throws IOException { PDDocumentInformation info = pdf.getDocumentInformation(); if (info != null) { result.addField(ParserFieldEnum.title, info.getTitle()); result.addField(ParserFieldEnum.subject, info.getSubject()); result.addField(ParserFieldEnum.author, info.getAuthor()); result.addField(ParserFieldEnum.producer, info.getProducer()); result.addField(ParserFieldEnum.keywords, info.getKeywords()); String d = getDate(getCreationDate(info)); if (d != null) result.addField(ParserFieldEnum.creation_date, d); d = getDate(getModificationDate(info)); if (d != null) result.addField(ParserFieldEnum.modification_date, d); }/*w w w.ja v a 2 s. c om*/ int pages = pdf.getNumberOfPages(); result.addField(ParserFieldEnum.number_of_pages, pages); PDDocumentCatalog catalog = pdf.getDocumentCatalog(); if (catalog != null) { result.addField(ParserFieldEnum.language, catalog.getLanguage()); } }
From source file:com.opensearchserver.extractor.parser.PdfBox.java
License:Apache License
private void extractMetaData(PDDocument pdf) throws IOException { PDDocumentInformation info = pdf.getDocumentInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(SUBJECT, info.getSubject()); metas.add(AUTHOR, info.getAuthor()); metas.add(PRODUCER, info.getProducer()); metas.add(KEYWORDS, info.getKeywords()); metas.add(CREATION_DATE, getDate(getCreationDate(info))); metas.add(MODIFICATION_DATE, getModificationDate(info)); }// w w w . j av a 2 s . c o m int pages = pdf.getNumberOfPages(); metas.add(NUMBER_OF_PAGES, pages); PDDocumentCatalog catalog = pdf.getDocumentCatalog(); if (catalog != null) metas.add(LANGUAGE, catalog.getLanguage()); }
From source file:com.qwazr.library.pdfbox.PdfBoxParser.java
License:Apache License
private void extractMetaData(final PDDocument pdf, final ParserFieldsBuilder metas) { metas.set(MIME_TYPE, DEFAULT_MIMETYPES[0]); final PDDocumentInformation info = pdf.getDocumentInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(SUBJECT, info.getSubject()); metas.add(AUTHOR, info.getAuthor()); metas.add(PRODUCER, info.getProducer()); metas.add(KEYWORDS, info.getKeywords()); metas.add(CREATION_DATE, info.getCreationDate()); metas.add(MODIFICATION_DATE, info.getModificationDate()); }//from w ww . j ava 2 s . co m int pages = pdf.getNumberOfPages(); metas.add(NUMBER_OF_PAGES, pages); PDDocumentCatalog catalog = pdf.getDocumentCatalog(); if (catalog != null) metas.add(LANGUAGE, catalog.getLanguage()); }
From source file:net.padaf.preflight.helpers.CatalogValidationHelper.java
License:Apache License
/** * The Lang element is optional but it is recommended. This method check the * Syntax of the Lang if this entry is present. * /*from w w w . jav a 2 s . com*/ * @param handler * @param catalog * @param result * @throws ValidationException */ protected void validateLang(DocumentHandler handler, PDDocumentCatalog catalog, List<ValidationError> result) throws ValidationException { String lang = catalog.getLanguage(); if (lang != null && !lang.matches("[A-Za-z]{1,8}(-[A-Za-z]{1,8})*")) { result.add(new ValidationError(ERROR_SYNTAX_LANG_NOT_RFC1766)); } }
From source file:org.apache.padaf.preflight.helpers.CatalogValidationHelper.java
License:Apache License
/** * The Lang element is optional but it is recommended. This method check the * Syntax of the Lang if this entry is present. * /*w ww . j ava2 s . com*/ * @param handler * @param catalog * @param result * @throws ValidationException */ protected void validateLang(DocumentHandler handler, PDDocumentCatalog catalog, List<ValidationError> result) throws ValidationException { String lang = catalog.getLanguage(); if (lang != null && !"".equals(lang) && !lang.matches("[A-Za-z]{1,8}(-[A-Za-z]{1,8})*")) { result.add(new ValidationError(ERROR_SYNTAX_LANG_NOT_RFC1766)); } }