List of usage examples for org.apache.pdfbox.cos COSDocument getTrailer
public COSDictionary getTrailer()
From source file:com.aaasec.sigserv.csspsupport.pdfbox.modifications.CsCOSWriter.java
License:Apache License
/** * This will write the body of the document. * * @param doc The document to write the body for. * * @throws IOException If there is an error writing the data. * @throws COSVisitorException If there is an error generating the data. *///from w w w. j a v a 2 s . c o m protected void doWriteBody(COSDocument doc) throws IOException, COSVisitorException { COSDictionary trailer = doc.getTrailer(); COSDictionary root = (COSDictionary) trailer.getDictionaryObject(COSName.ROOT); COSDictionary info = (COSDictionary) trailer.getDictionaryObject(COSName.INFO); COSDictionary encrypt = (COSDictionary) trailer.getDictionaryObject(COSName.ENCRYPT); if (root != null) { addObjectToWrite(root); } if (info != null) { addObjectToWrite(info); } while (objectsToWrite.size() > 0) { COSBase nextObject = objectsToWrite.removeFirst(); objectsToWriteSet.remove(nextObject); doWriteObject(nextObject); } willEncrypt = false; if (encrypt != null) { addObjectToWrite(encrypt); } while (objectsToWrite.size() > 0) { COSBase nextObject = objectsToWrite.removeFirst(); objectsToWriteSet.remove(nextObject); doWriteObject(nextObject); } }
From source file:com.aaasec.sigserv.csspsupport.pdfbox.modifications.CsCOSWriter.java
License:Apache License
/** * This will write the trailer to the PDF document. * * @param doc The document to create the trailer for. * * @throws IOException If there is an IOError while writing the document. * @throws COSVisitorException If there is an error while generating the * data./*from ww w.ja v a2s.c om*/ */ protected void doWriteTrailer(COSDocument doc) throws IOException, COSVisitorException { getStandardOutput().write(TRAILER); getStandardOutput().writeEOL(); COSDictionary trailer = doc.getTrailer(); //sort xref, needed only if object keys not regenerated Collections.sort(getXRefEntries()); COSWriterXRefEntry lastEntry = getXRefEntries().get(getXRefEntries().size() - 1); trailer.setInt(COSName.SIZE, (int) lastEntry.getKey().getNumber() + 1); // Only need to stay, if an incremental update will be performed if (!incrementalUpdate) { trailer.removeItem(COSName.PREV); } // Remove a checksum if present trailer.removeItem(COSName.DOC_CHECKSUM); trailer.accept(this); }
From source file:com.aaasec.sigserv.csspsupport.pdfbox.modifications.CsCOSWriter.java
License:Apache License
/** * write the x ref section for the pdf file * * currently, the pdf is reconstructed from the scratch, so we write a * single section//from w ww .j a va 2 s . co m * * todo support for incremental writing? * * @param doc The document to write the xref from. * * @throws IOException If there is an error writing the data to the stream. */ protected void doWriteXRef(COSDocument doc) throws IOException { if (doc.isXRefStream()) { // sort xref, needed only if object keys not regenerated Collections.sort(getXRefEntries()); COSWriterXRefEntry lastEntry = getXRefEntries().get(getXRefEntries().size() - 1); // remember the position where x ref is written setStartxref(getStandardOutput().getPos()); // getStandardOutput().write(XREF); getStandardOutput().writeEOL(); // write start object number and object count for this x ref section // we assume starting from scratch writeXrefRange(0, lastEntry.getKey().getNumber() + 1); // write initial start object with ref to first deleted object and magic generation number writeXrefEntry(COSWriterXRefEntry.getNullEntry()); // write entry for every object long lastObjectNumber = 0; for (Iterator<COSWriterXRefEntry> i = getXRefEntries().iterator(); i.hasNext();) { COSWriterXRefEntry entry = i.next(); while (lastObjectNumber < entry.getKey().getNumber() - 1) { writeXrefEntry(COSWriterXRefEntry.getNullEntry()); } lastObjectNumber = entry.getKey().getNumber(); writeXrefEntry(entry); } } else { COSDictionary trailer = doc.getTrailer(); trailer.setLong(COSName.PREV, doc.getStartXref()); addXRefEntry(COSWriterXRefEntry.getNullEntry()); // sort xref, needed only if object keys not regenerated Collections.sort(getXRefEntries()); // remember the position where x ref was written setStartxref(getStandardOutput().getPos()); getStandardOutput().write(XREF); getStandardOutput().writeEOL(); // write start object number and object count for this x ref section // we assume starting from scratch Integer[] xRefRanges = getXRefRanges(getXRefEntries()); int xRefLength = xRefRanges.length; int x = 0; int j = 0; while (x < xRefLength && (xRefLength % 2) == 0) { writeXrefRange(xRefRanges[x], xRefRanges[x + 1]); for (int i = 0; i < xRefRanges[x + 1]; ++i) { writeXrefEntry(xRefEntries.get(j++)); } x += 2; } } }
From source file:com.aaasec.sigserv.csspsupport.pdfbox.modifications.CsCOSWriter.java
License:Apache License
private void doWriteXRefInc(COSDocument doc, long hybridPrev) throws IOException, COSVisitorException { if (doc.isXRefStream() || hybridPrev != -1) { // the file uses XrefStreams, so we need to update // it with an xref stream. We create a new one and fill it // with data available here // first set an entry for the null entry in the xref table // this is probably not necessary // addXRefEntry(COSWriterXRefEntry.getNullEntry()); // create a new XRefStrema object PDFXRefStream pdfxRefStream = new PDFXRefStream(); // add all entries from the incremental update. List<COSWriterXRefEntry> xRefEntries2 = getXRefEntries(); for (COSWriterXRefEntry cosWriterXRefEntry : xRefEntries2) { pdfxRefStream.addEntry(cosWriterXRefEntry); }/*from w w w.j a v a2 s.c o m*/ COSDictionary trailer = doc.getTrailer(); // trailer.setLong(COSName.PREV, hybridPrev == -1 ? prev : hybridPrev); trailer.setLong(COSName.PREV, doc.getStartXref()); pdfxRefStream.addTrailerInfo(trailer); // the size is the highest object number+1. we add one more // for the xref stream object we are going to write pdfxRefStream.setSize(getNumber() + 2); setStartxref(getStandardOutput().getPos()); COSStream stream2 = pdfxRefStream.getStream(); doWriteObject(stream2); } if (!doc.isXRefStream() || hybridPrev != -1) { COSDictionary trailer = doc.getTrailer(); trailer.setLong(COSName.PREV, doc.getStartXref()); if (hybridPrev != -1) { COSName xrefStm = COSName.XREF_STM; trailer.removeItem(xrefStm); trailer.setLong(xrefStm, getStartxref()); } addXRefEntry(COSWriterXRefEntry.getNullEntry()); // sort xref, needed only if object keys not regenerated Collections.sort(getXRefEntries()); // remember the position where x ref was written setStartxref(getStandardOutput().getPos()); getStandardOutput().write(XREF); getStandardOutput().writeEOL(); // write start object number and object count for this x ref section // we assume starting from scratch Integer[] xRefRanges = getXRefRanges(getXRefEntries()); int xRefLength = xRefRanges.length; int x = 0; int j = 0; while (x < xRefLength && (xRefLength % 2) == 0) { writeXrefRange(xRefRanges[x], xRefRanges[x + 1]); for (int i = 0; i < xRefRanges[x + 1]; ++i) { writeXrefEntry(xRefEntries.get(j++)); } x += 2; } } }
From source file:com.aaasec.sigserv.csspsupport.pdfbox.modifications.CsCOSWriter.java
License:Apache License
/** * The visit from document method.//ww w. jav a 2 s . co m * * @param doc The object that is being visited. * * @throws COSVisitorException If there is an exception while visiting this * object. * * @return null */ public Object visitFromDocument(COSDocument doc) throws COSVisitorException { try { if (!incrementalUpdate) { doWriteHeader(doc); } doWriteBody(doc); // get the previous trailer COSDictionary trailer = doc.getTrailer(); long hybridPrev = -1; if (trailer != null) { hybridPrev = trailer.getLong(COSName.XREF_STM); } if (incrementalUpdate) { doWriteXRefInc(doc, hybridPrev); } else { doWriteXRef(doc); } // the trailer section should only be used for xref tables not for xref streams if (!incrementalUpdate || !doc.isXRefStream() || hybridPrev != -1) { doWriteTrailer(doc); } // write endof getStandardOutput().write(STARTXREF); getStandardOutput().writeEOL(); getStandardOutput().write(String.valueOf(getStartxref()).getBytes("ISO-8859-1")); getStandardOutput().writeEOL(); getStandardOutput().write(EOF); getStandardOutput().writeEOL(); if (incrementalUpdate) { doWriteSignature(doc); } return null; } catch (IOException e) { throw new COSVisitorException(e); } catch (SignatureException e) { throw new COSVisitorException(e); } }
From source file:com.aaasec.sigserv.csspsupport.pdfbox.modifications.CsCOSWriter.java
License:Apache License
/** * This will write the pdf document.//from www .ja va 2s. c om * * @param doc The document to write. * @param idTime The time seed used to generate the id * * @throws COSVisitorException If an error occurs while generating the data. */ public void write(PDDocument doc, long idTime) throws COSVisitorException { document = doc; if (incrementalUpdate) { prepareIncrement(doc); } // if the document says we should remove encryption, then we shouldn't encrypt if (doc.isAllSecurityToBeRemoved()) { this.willEncrypt = false; // also need to get rid of the "Encrypt" in the trailer so readers // don't try to decrypt a document which is not encrypted COSDocument cosDoc = doc.getDocument(); COSDictionary trailer = cosDoc.getTrailer(); trailer.removeItem(COSName.ENCRYPT); } else { SecurityHandler securityHandler = document.getSecurityHandler(); if (securityHandler != null) { try { securityHandler.prepareDocumentForEncryption(document); this.willEncrypt = true; } catch (IOException e) { throw new COSVisitorException(e); } catch (CryptographyException e) { throw new COSVisitorException(e); } } else { this.willEncrypt = false; } } COSDocument cosDoc = document.getDocument(); COSDictionary trailer = cosDoc.getTrailer(); COSArray idArray = (COSArray) trailer.getDictionaryObject(COSName.ID); if (idArray == null || incrementalUpdate) { try { //algorithm says to use time/path/size/values in doc to generate //the id. We don't have path or size, so do the best we can MessageDigest md = MessageDigest.getInstance("MD5"); md.update(Long.toString(idTime).getBytes("ISO-8859-1")); COSDictionary info = (COSDictionary) trailer.getDictionaryObject(COSName.INFO); if (info != null) { Iterator<COSBase> values = info.getValues().iterator(); while (values.hasNext()) { md.update(values.next().toString().getBytes("ISO-8859-1")); } } idArray = new COSArray(); COSString id = new COSString(md.digest()); idArray.add(id); idArray.add(id); trailer.setItem(COSName.ID, idArray); } catch (NoSuchAlgorithmException e) { throw new COSVisitorException(e); } catch (UnsupportedEncodingException e) { throw new COSVisitorException(e); } } cosDoc.accept(this); }
From source file:FeatureExtraction.FeatureExtractorPDFStructuralPaths.java
/** * Return true if the PDF is compatible/*from w w w. j a v a 2 s. com*/ * * @param filePath pdf file path * @return true if the PDF is compatible */ public boolean IsCompatiblePDF2(String filePath) { File pdfFile = new File(filePath); Map<String, Integer> structuralPaths = new HashMap<>(); HashSet<COSBase> visitedObjects = new HashSet<>(); boolean compatible = true; PDDocument pdf = new PDDocument(); COSDocument pdfDocument = new COSDocument(); String parentPath = ""; String pdfObjectName = "Trailer"; try { pdf = PDDocument.load(pdfFile); pdfDocument = pdf.getDocument(); ExtractPDFStructuralPathsRecursively(pdfDocument.getTrailer().getCOSObject(), pdfObjectName, parentPath, structuralPaths, visitedObjects, 1); } catch (IOException e) { compatible = false; } finally { try { pdf.close(); pdfDocument.close(); } catch (IOException e) { Console.PrintException(String.format("Error closing PDF file: '%s'", filePath), e); } } return compatible; }
From source file:FeatureExtraction.FeatureExtractorPDFStructuralPathsInputStream.java
@Override public Map ExtractFeaturesFrequencyFromSingleElement(T element) { Map<String, Integer> structuralPaths = new HashMap<>(); HashSet<COSBase> visitedObjects = new HashSet<>(); InputStream fileInputStream = (InputStream) element; String filePath = ""; try {/*from w w w. ja v a 2 s. c o m*/ switch (m_parserType) { case Sequential: try (PDDocument pdf = PDDocument.load(fileInputStream)) { COSDocument pdfDocument = pdf.getDocument(); ExtractPDFStructuralPathsRecursively(pdfDocument.getTrailer().getCOSObject(), "Trailer", "", structuralPaths, visitedObjects); //ExtractPDFStructuralPathsQUEUE(pdfDocument.getTrailer().getCOSObject(), structuralPaths); } catch (Exception e) { Console.PrintException(String.format("Error parsing PDF file: '%s'", filePath), e); } break; case NonSequential: File randomAccessFile = new File(filePath + ".ra"); RandomAccess randomAccess = new RandomAccessFile(randomAccessFile, "rwd"); try (PDDocument pdf = PDDocument.loadNonSeq(fileInputStream, randomAccess)) { COSDocument pdfDocument = pdf.getDocument(); ExtractPDFStructuralPathsRecursively(pdfDocument.getTrailer().getCOSObject(), "Trailer", "", structuralPaths, visitedObjects); //ExtractPDFStructuralPathsQUEUE(pdfDocument.getTrailer().getCOSObject(), structuralPaths); } catch (Exception e) { Console.PrintException(String.format("Error parsing PDF file: '%s'", filePath), e); } finally { randomAccessFile.delete(); } break; } } catch (IOException e) { Console.PrintException(String.format("Error parsing PDF file: '%s'", filePath), e); } return structuralPaths; }
From source file:FeatureExtraction.FeatureExtractorPDFStructuralPathsOld.java
@Override public Map ExtractFeaturesFrequencyFromSingleElement(T element) { Map<String, Integer> structuralPaths = new HashMap<>(); HashSet<COSBase> visitedObjects = new HashSet<>(); String filePath = (String) element; File pdfFile = new File(filePath); try {// w w w.ja v a 2 s. c om switch (m_parserType) { case Sequential: try (PDDocument pdf = PDDocument.load(pdfFile)) { COSDocument pdfDocument = pdf.getDocument(); ExtractPDFStructuralPathsRecursively(pdfDocument.getTrailer().getCOSObject(), "Trailer", "", structuralPaths, visitedObjects); } catch (Exception e) { Console.PrintException(String.format("Error parsing PDF file: '%s'", filePath), e); } break; case NonSequential: File randomAccessFile = new File(filePath + ".ra"); RandomAccess randomAccess = new RandomAccessFile(randomAccessFile, "rwd"); try (PDDocument pdf = PDDocument.loadNonSeq(pdfFile, randomAccess)) { COSDocument pdfDocument = pdf.getDocument(); ExtractPDFStructuralPathsRecursively(pdfDocument.getTrailer().getCOSObject(), "Trailer", "", structuralPaths, visitedObjects); } catch (Exception e) { Console.PrintException(String.format("Error parsing PDF file: '%s'", filePath), e); } finally { randomAccessFile.delete(); } break; } } catch (IOException e) { Console.PrintException(String.format("Error parsing PDF file: '%s'", filePath), e); } return structuralPaths; }
From source file:FeatureExtraction.FeatureExtractorPDFStructuralPathsOld.java
/** * Return true if the PDF is compatible/*w w w. j a v a 2 s. c om*/ * * @param filePath pdf file path * @return true if the PDF is compatible */ public boolean IsCompatiblePDF2(String filePath) { File pdfFile = new File(filePath); Map<String, Integer> structuralPaths = new HashMap<>(); HashSet<COSBase> visitedObjects = new HashSet<>(); boolean compatible = true; PDDocument pdf = new PDDocument(); COSDocument pdfDocument = new COSDocument(); try { pdf = PDDocument.load(pdfFile); pdfDocument = pdf.getDocument(); ExtractPDFStructuralPathsRecursively(pdfDocument.getTrailer().getCOSObject(), "Trailer", "", structuralPaths, visitedObjects); } catch (IOException e) { compatible = false; } finally { try { pdf.close(); pdfDocument.close(); } catch (IOException e) { Console.PrintException(String.format("Error closing PDF file: '%s'", filePath), e); } } return compatible; }