Example usage for org.apache.pdfbox.cos COSDocument COSDocument

List of usage examples for org.apache.pdfbox.cos COSDocument COSDocument

Introduction

In this page you can find the example usage for org.apache.pdfbox.cos COSDocument COSDocument.

Prototype

public COSDocument() 

Source Link

Document

Constructor.

Usage

From source file:FeatureExtraction.FeatureExtractorPDFStructuralPaths.java

/**
 * Return true if the PDF is compatible/*from ww  w.jav a  2  s. co  m*/
 *
 * @param filePath pdf file path
 * @return true if the PDF is compatible
 */
public boolean IsCompatiblePDF2(String filePath) {
    File pdfFile = new File(filePath);
    Map<String, Integer> structuralPaths = new HashMap<>();
    HashSet<COSBase> visitedObjects = new HashSet<>();
    boolean compatible = true;
    PDDocument pdf = new PDDocument();
    COSDocument pdfDocument = new COSDocument();
    String parentPath = "";
    String pdfObjectName = "Trailer";
    try {
        pdf = PDDocument.load(pdfFile);
        pdfDocument = pdf.getDocument();
        ExtractPDFStructuralPathsRecursively(pdfDocument.getTrailer().getCOSObject(), pdfObjectName, parentPath,
                structuralPaths, visitedObjects, 1);
    } catch (IOException e) {
        compatible = false;
    } finally {
        try {
            pdf.close();
            pdfDocument.close();
        } catch (IOException e) {
            Console.PrintException(String.format("Error closing PDF file: '%s'", filePath), e);
        }
    }
    return compatible;
}

From source file:FeatureExtraction.FeatureExtractorPDFStructuralPathsOld.java

/**
 * Return true if the PDF is compatible/*from w  w w  .j a v a 2  s .  co  m*/
 *
 * @param filePath pdf file path
 * @return true if the PDF is compatible
 */
public boolean IsCompatiblePDF2(String filePath) {
    File pdfFile = new File(filePath);
    Map<String, Integer> structuralPaths = new HashMap<>();
    HashSet<COSBase> visitedObjects = new HashSet<>();
    boolean compatible = true;
    PDDocument pdf = new PDDocument();
    COSDocument pdfDocument = new COSDocument();
    try {
        pdf = PDDocument.load(pdfFile);
        pdfDocument = pdf.getDocument();
        ExtractPDFStructuralPathsRecursively(pdfDocument.getTrailer().getCOSObject(), "Trailer", "",
                structuralPaths, visitedObjects);
    } catch (IOException e) {
        compatible = false;
    } finally {
        try {
            pdf.close();
            pdfDocument.close();
        } catch (IOException e) {
            Console.PrintException(String.format("Error closing PDF file: '%s'", filePath), e);
        }
    }
    return compatible;
}

From source file:net.awl.edoc.pdfa.LInearPdf.java

License:Apache License

public static void main(String[] args) throws Exception {
    InputStream is = new FileInputStream("/home/eric/dictionnaire.txt");
    // PDFStreamParser parser = new PDFStreamParser(is,new
    // RandomAccessBuffer());
    // COSDocument doc = new COSDocument();
    // parser.setDocument(doc);
    // parser.parse();
    ////from w w w  . j a v a 2  s  .  c  om
    // System.out.println(((COSDictionary)doc.getObjects().get(0)).getInt("Size"));

    CustomParser parser = new CustomParser(is);
    COSDocument doc = new COSDocument();
    parser.setDocument(doc);
    COSDictionary dic = parser.getCOSDictionary();
    System.out.println(dic.toString());
}

From source file:net.padaf.preflight.helpers.TrailerValidationHelper.java

License:Apache License

@Override
public List<ValidationError> innerValidate(DocumentHandler handler) throws ValidationException {

    List<ValidationError> result = new ArrayList<ValidationError>(0);
    PDDocument pdfDoc = handler.getDocument();

    COSDictionary linearizedDict = isLinearizedPdf(pdfDoc);
    if (linearizedDict != null) {
        // it is a linearized PDF, check the linearized dictionary
        checkLinearizedDictionnary(linearizedDict, result);

        // if the pdf is a linearized pdf. the first trailer must be checked
        // and it must have the same ID than the last trailer.
        List<String> lTrailers = handler.getPdfExtractor().getAllTrailers();
        String firstTrailer = lTrailers.get(0);
        String lastTrailer = lTrailers.get(lTrailers.size() - 1);

        COSDictionary first = null;// w w  w  .j av a 2  s . c o  m
        COSDictionary last = null;
        COSDocument cd = null;
        try {
            cd = new COSDocument();
            PdfElementParser parser1 = new PdfElementParser(cd, firstTrailer.getBytes());
            first = parser1.parseAsDictionary();
            PdfElementParser parser2 = new PdfElementParser(cd, lastTrailer.getBytes());
            last = parser2.parseAsDictionary();

            checkMainTrailer(pdfDoc.getDocument(), first, result);
            if (!compareIds(first, last, pdfDoc.getDocument())) {
                result.add(new ValidationResult.ValidationError(
                        ValidationConstants.ERROR_SYNTAX_TRAILER_ID_CONSISTENCY,
                        "ID is different in the first and the last trailer"));
            }

        } catch (IOException e) {
            result.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER,
                    "Unable to parse trailers of the linearized PDF"));
        } finally {
            COSUtils.closeDocumentQuietly(cd);
        }

    } else {
        // If the PDF isn't a linearized one, only the last trailer must be
        // checked
        List<String> lTrailers = handler.getPdfExtractor().getAllTrailers();
        String lastTrailer = lTrailers.get(lTrailers.size() - 1);

        COSDocument cd = null;
        try {
            cd = new COSDocument();
            PdfElementParser parser = new PdfElementParser(cd, lastTrailer.getBytes());
            COSDictionary trailer = parser.parseAsDictionary();
            checkMainTrailer(pdfDoc.getDocument(), trailer, result);
        } catch (IOException e) {
            result.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER,
                    "The trailer dictionary is missing"));
        } finally {
            try {
                cd.close();
            } catch (IOException e) {
                COSUtils.closeDocumentQuietly(cd);
            }
        }

    }
    return result;
}

From source file:net.padaf.preflight.utils.TestCOSUtils.java

License:Apache License

@Test
public void testIsInteger() {
    try {// w w  w  .ja  v a  2s .  c  o  m
        COSObject co = new COSObject(new COSInteger(10));
        co.setGenerationNumber(new COSInteger(0));
        co.setObjectNumber(new COSInteger(10));

        assertFalse(COSUtils.isInteger(co, new IOCOSDocument()));

        COSDocument doc = new COSDocument();
        doc.setXRef(new COSObjectKey(co), 1000);
        COSUtils.isInteger(co, doc);
        doc.close();
    } catch (IOException e) {
        fail(e.getMessage());
    }
}

From source file:net.padaf.preflight.utils.TestCOSUtils.java

License:Apache License

@Test
public void testIsFloat() {
    try {/*w  ww . j a  v  a 2  s  . c  o m*/
        COSObject co = new COSObject(new COSFloat(10.0f));
        co.setGenerationNumber(new COSInteger(0));
        co.setObjectNumber(new COSInteger(10));

        assertFalse(COSUtils.isFloat(co, new IOCOSDocument()));

        COSDocument doc = new COSDocument();
        doc.setXRef(new COSObjectKey(co), 1000);
        COSUtils.isFloat(co, doc);
        doc.close();
    } catch (IOException e) {
        fail(e.getMessage());
    }
}

From source file:net.padaf.preflight.utils.TestCOSUtils.java

License:Apache License

@Test
public void testIsString() {
    try {/*  ww  w .  ja  va 2 s  .  c om*/
        COSObject co = new COSObject(new COSString(""));
        co.setGenerationNumber(new COSInteger(0));
        co.setObjectNumber(new COSInteger(10));

        assertFalse(COSUtils.isString(co, new IOCOSDocument()));

        COSDocument doc = new COSDocument();
        doc.setXRef(new COSObjectKey(co), 1000);
        COSUtils.isString(co, doc);
        doc.close();
    } catch (IOException e) {
        fail(e.getMessage());
    }
}

From source file:net.padaf.preflight.utils.TestCOSUtils.java

License:Apache License

@Test
public void testIsStream() {
    try {//from w  ww.  j a v  a2s .  c  om
        COSObject co = new COSObject(new COSStream(null));
        co.setGenerationNumber(new COSInteger(0));
        co.setObjectNumber(new COSInteger(10));

        assertFalse(COSUtils.isStream(co, new IOCOSDocument()));

        COSDocument doc = new COSDocument();
        doc.setXRef(new COSObjectKey(co), 1000);
        COSUtils.isStream(co, doc);
        doc.close();
    } catch (IOException e) {
        fail(e.getMessage());
    }
}

From source file:net.padaf.preflight.utils.TestCOSUtils.java

License:Apache License

@Test
public void testIsDictionary() {
    try {/* w w w  .  ja v  a  2s  . co m*/
        COSObject co = new COSObject(new COSDictionary());
        co.setGenerationNumber(new COSInteger(0));
        co.setObjectNumber(new COSInteger(10));

        assertFalse(COSUtils.isDictionary(co, new IOCOSDocument()));

        COSDocument doc = new COSDocument();
        doc.setXRef(new COSObjectKey(co), 1000);
        COSUtils.isDictionary(co, doc);
        doc.close();
    } catch (IOException e) {
        fail(e.getMessage());
    }
}

From source file:net.padaf.preflight.utils.TestCOSUtils.java

License:Apache License

@Test
public void testIsArray() {
    try {//from ww w  .  j a  v a  2  s. com
        COSObject co = new COSObject(new COSArray());
        co.setGenerationNumber(new COSInteger(0));
        co.setObjectNumber(new COSInteger(10));

        assertFalse(COSUtils.isArray(co, new IOCOSDocument()));

        COSDocument doc = new COSDocument();
        doc.setXRef(new COSObjectKey(co), 1000);
        COSUtils.isArray(co, doc);
        doc.close();
    } catch (IOException e) {
        fail(e.getMessage());
    }
}