List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:org.mabb.fontverter.opentype.TtfInstructions.TestFullTtfPrograms.java
License:Open Source License
@Test public void executeSecondGlyphIn_BrokenHelveticaNeueTtf() throws Exception { PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/HorariosMadrid_Segovia.pdf")); PDFont rawType0Font = extractFont(doc, "TCQDAA+HelveticaNeue-Light-Identity-H"); OpenTypeFont font = (OpenTypeFont) PdfFontExtractor.convertType0FontToOpenType((PDType0Font) rawType0Font); List<TtfGlyph> glyphs = font.getGlyfTable().getNonEmptyGlyphs(); TtfGlyph glyph = glyphs.get(1);//from www . j a va2 s. c om List<TtfInstruction> instructions = glyph.getInstructions(); TtfVirtualMachine vm = new TtfVirtualMachine(font); vm.execute(instructions); }
From source file:org.mabb.fontverter.pdf.PdfFontExtractor.java
License:Open Source License
private static void extractPdfFonts(String extractPath, File pdfFile, FontFormat format) throws IOException { File fontExtractDir = new File(extractPath); if (!fontExtractDir.exists()) fontExtractDir.mkdir();//ww w .ja v a 2s .c o m PDDocument pdf = PDDocument.load(pdfFile); PdfFontExtractor fontExtractor = new PdfFontExtractor(); fontExtractor.setExtractFormat(format); fontExtractor.extractFontsToDir(pdf, extractPath); pdf.close(); }
From source file:org.mabb.fontverter.pdf.PdfFontExtractor.java
License:Open Source License
public void extractFontsToDir(File pdf, String path) throws IOException { PDDocument doc = PDDocument.load(pdf); extractFontsToDir(doc, path); doc.close(); }
From source file:org.mabb.fontverter.pdf.PdfFontExtractor.java
License:Open Source License
public void extractFontsToDir(byte[] pdf, String path) throws IOException { PDDocument doc = PDDocument.load(pdf); extractFontsToDir(doc, path);/*from ww w .j ava 2 s .c o m*/ doc.close(); }
From source file:org.mabb.fontverter.pdf.TestPdfFontExtractor.java
License:Open Source License
@Test public void givenPdfWith2Fonts_extractFontsToFVFontList_thenListHasSameNumberOfFonts() throws IOException { PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/brno30.pdf")); PdfFontExtractor extractor = new PdfFontExtractor(); List<FVFont> fonts = extractor.extractToFVFonts(doc); Assert.assertEquals(3, fonts.size()); doc.close();/*from w w w.j a v a 2 s .c om*/ }
From source file:org.mabb.fontverter.pdf.TestPdfFontExtractor.java
License:Open Source License
@Test public void givenPdfWith2Fonts_extractFontsToDir_thenDirectoryHasThreeTtfFiles() throws IOException { PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/brno30.pdf")); PdfFontExtractor extractor = new PdfFontExtractor(); File extractDir = folder.getRoot(); extractor.extractFontsToDir(doc, extractDir); File[] fontFiles = extractDir.listFiles(); Assert.assertEquals(3, fontFiles.length); for (File fileOn : fontFiles) Assert.assertEquals("ttf", FilenameUtils.getExtension(fileOn.getPath())); doc.close();//from ww w.j a va 2s . c o m }
From source file:org.mabb.fontverter.pdf.TestPdfFontExtractor.java
License:Open Source License
@Test public void givenPdfWith2Fonts_extractFontsToDirWithWoff1FormatSet_thenDirectoryHasThreeWoffFiles() throws IOException { File extractDir = folder.getRoot(); PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/brno30.pdf")); PdfFontExtractor extractor = new PdfFontExtractor(); extractor.setExtractFormat(FontVerter.FontFormat.WOFF1); extractor.extractFontsToDir(doc, extractDir); File[] fontFiles = extractDir.listFiles(); Assert.assertEquals(3, fontFiles.length); for (File fileOn : fontFiles) Assert.assertEquals("woff", FilenameUtils.getExtension(fileOn.getPath())); doc.close();//from ww w . j av a 2 s .c om }
From source file:org.mabb.fontverter.pdf.TestType0ToOpenTypeConverter.java
License:Open Source License
public TestType0ToOpenTypeConverter() throws IOException { doc = PDDocument.load(TestUtils.readTestFile("pdf/HorariosMadrid_Segovia.pdf")); }
From source file:org.MagicBeans.latexFileType.PDFGenerator.java
public PDPage getPDFPage(int number) { PDF_PATH = ApplicationUtils.getTempPDFFile(workingDir); PDPage page = null;/*from ww w . j av a2s . c om*/ File pdfFile = null; try { pdfFile = new File(PDF_PATH); if (pdfFile.exists()) { inputPDF = PDDocument.load(pdfFile); List<PDPage> allPages = inputPDF.getDocumentCatalog().getAllPages(); if (allPages != null && !allPages.isEmpty() && allPages.size() >= number && number > 0) { page = allPages.get(number - 1); } } } catch (IOException ex) { return null; } return page; }
From source file:org.mitre.xtext.converters.PDFConverter.java
License:Apache License
/** Implementation is informed by PDFBox authors. *///from ww w . j av a 2s . com @Override public synchronized ConvertedDocument convert(java.io.File doc) throws IOException { /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * Adapted from LucenePDFDocument.java from PDFBox lucene project * * This class is used to create a document for the lucene search engine. * This should easily plug into the IndexHTML or IndexFiles that comes with * the lucene project. This class will populate the following fields. * <table> <tr> <th>Lucene Field Name</th> <th>Description</th> </tr> <tr> * <td>path</td> <td>File system path if loaded from a file</td> </tr> <tr> * <td>url</td> <td>URL to PDF document</td> </tr> <tr> <td>contents</td> * <td>Entire contents of PDF document, indexed but not stored</td> </tr> * <tr> <td>summary</td> <td>First 500 characters of content</td> </tr> <tr> * <td>modified</td> <td>The modified date/time according to the url or * path</td> </tr> <tr> <td>uid</td> <td>A unique identifier for the Lucene * document.</td> </tr> <tr> <td>CreationDate</td> <td>From PDF meta-data if * available</td> </tr> <tr> <td>Creator</td> <td>From PDF meta-data if * available</td> </tr> <tr> <td>Keywords</td> <td>From PDF meta-data if * available</td> </tr> <tr> <td>ModificationDate</td> <td>From PDF * meta-data if available</td> </tr> <tr> <td>Producer</td> <td>From PDF * meta-data if available</td> </tr> <tr> <td>Subject</td> <td>From PDF * meta-data if available</td> </tr> <tr> <td>Trapped</td> <td>From PDF * meta-data if available</td> </tr> <tr> <td>Encrypted</td> <td>From PDF * meta-data if available</td> </tr> </table> * * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a> * @version $Revision: 1.23 $ * * @throws IOException If there is an error parsing the document. */ PDDocument pdfDocument = null; ConvertedDocument textdoc = new ConvertedDocument(doc); try { pdfDocument = PDDocument.load(doc); if (pdfDocument.isEncrypted()) { //Just try using the default password and move on /** * * Exception in thread "main" java.lang.NoClassDefFoundError: * org/bouncycastle/jce/provider/BouncyCastleProvider at * org.apache.pdfbox.pdmodel.PDDocument.openProtection(PDDocument.java:1090) * at * org.apache.pdfbox.pdmodel.PDDocument.decrypt(PDDocument.java:594) * * CRYPTO stuff -- load BouncyCastle crypto JAR files. try { * pdfDocument.decrypt(""); } catch (CryptographyException e) { * throw new IOException("Error decrypting document(" + pdf_file * + "): " + e); } catch (InvalidPasswordException e) { //they * didn't suppply a password and the default of "" was wrong. * throw new IOException( "Error: The document(" + pdf_file + ") * is encrypted "); } finally { if (pdfDocument != null) { * pdfDocument.close();} } */ textdoc.addProperty("encrypted", "YES"); } else { //create a writer where to append the text content. StringWriter writer = new StringWriter(); stripper.resetEngine(); stripper.writeText(pdfDocument, writer); PDDocumentInformation info = pdfDocument.getDocumentInformation(); if (info != null) { textdoc.addAuthor(info.getAuthor()); try { textdoc.addCreateDate(info.getCreationDate()); } catch (IOException io) { //ignore, bad date but continue with indexing } textdoc.addProperty("creator_tool", info.getCreator()); textdoc.addProperty("keywords", info.getKeywords()); /* try { metadata.add("ModificationDate", info.getModificationDate()); } catch (IOException io) { //ignore, bad date but continue with indexing } */ //metadata.add("Producer", info.getProducer()); textdoc.addProperty("subject", info.getSubject()); String ttl = info.getTitle(); if (ttl == null || "untitled".equalsIgnoreCase(ttl)) { ttl = textdoc.filename; } textdoc.addTitle(ttl); // metadata.add("Trapped", info.getTrapped()); // TODO: Character set is what? textdoc.setEncoding("UTF-8"); } // Note: the buffer to string operation is costless; // the char array value of the writer buffer and the content string // is shared as long as the buffer content is not modified, which will // not occur here. textdoc.setPayload(writer.getBuffer().toString()); } return textdoc; } finally { if (pdfDocument != null) { pdfDocument.close(); } } }