ParsingSample.java :  » PDF » PDFClown-0.0.5 » it » stefanochizzolini » clown » samples » Java Open Source

Java Open Source » PDF » PDFClown 0.0.5 
PDFClown 0.0.5 » it » stefanochizzolini » clown » samples » ParsingSample.java
package it.stefanochizzolini.clown.samples;

import it.stefanochizzolini.clown.documents.Document;
import it.stefanochizzolini.clown.documents.Page;
import it.stefanochizzolini.clown.documents.Pages;
import it.stefanochizzolini.clown.documents.contents.Contents;
import it.stefanochizzolini.clown.documents.contents.Resources;
import it.stefanochizzolini.clown.documents.contents.objects.CompositeObject;
import it.stefanochizzolini.clown.documents.contents.objects.ContentObject;
import it.stefanochizzolini.clown.documents.contents.objects.Operation;
import it.stefanochizzolini.clown.documents.interchange.metadata.Information;
import it.stefanochizzolini.clown.files.File;
import it.stefanochizzolini.clown.objects.PdfDictionary;
import it.stefanochizzolini.clown.objects.PdfIndirectObject;
import it.stefanochizzolini.clown.objects.PdfName;
import it.stefanochizzolini.clown.objects.PdfReference;
import it.stefanochizzolini.clown.tokens.FileFormatException;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
  This sample demonstrates how to inspect the structure of a PDF document.
  <h3>Remarks</h3>
  <p>This implementation is just a limited exercise: see the API documentation
  to perform all the possible access functionalities.</p>
*/
public class ParsingSample
  implements ISample
{
  public void run(
    PDFClownSampleLoader loader
    )
  {
    // (boilerplate user choice -- ignore it)
    String filePath = loader.getPdfFileChoice("Please select a PDF file");

    // 1. Open the PDF file!
    File file;
    try{file = new File(filePath);}
    catch(FileFormatException e){throw new RuntimeException(filePath + " file has a bad file format.",e);}
    catch(Exception e){throw new RuntimeException(filePath + " file access error.",e);}

    // 2. Parsing the document...
    // Get the PDF document!
    Document document = file.getDocument();
    // 2.1. Showing basic metadata...
    System.out.println("\nDocument information:");
    Information info = document.getInformation();
    if(info == null)
    {System.out.println("No information available (Info dictionary doesn't exist).");}
    else
    {
      System.out.println("Author: " + info.getAuthor());
      System.out.println("Title: " + info.getTitle());
      System.out.println("Subject: " + info.getSubject());
      System.out.println("CreationDate: " + info.getCreationDate());
    }

    System.out.println("\nIterating through the indirect-object collection (please wait)...");

    // 2.2. Counting the indirect objects, grouping them by type...
    HashMap<String,Integer> objCounters = new HashMap<String,Integer>();
    objCounters.put("xref free entry",0);
    for(PdfIndirectObject object : file.getIndirectObjects())
    {
      if(object.isInUse()) // In-use entry.
      {
        String typeName = object.getDataObject().getClass().getName();
        if(objCounters.containsKey(typeName))
        {objCounters.put(typeName, objCounters.get(typeName) + 1);}
        else
        {objCounters.put(typeName, 1);}
      }
      else // Free entry.
      {objCounters.put("xref free entry", objCounters.get("xref free entry") + 1);}
    }
    System.out.println("\nIndirect objects partial counts (grouped by PDF object type):");
    for(Map.Entry<String,Integer> entry : objCounters.entrySet())
    {System.out.println(" " + entry.getKey() + ": " + entry.getValue());}
    System.out.println("Indirect objects total count: " + file.getIndirectObjects().size());

    // 2.3. Showing some page information...
    Pages pages = document.getPages();
    int pageCount = pages.size();
    System.out.println("\nPage count: " + pageCount);
    int pageIndex = (int)Math.floor((float)pageCount / 2);
    Page page = pages.get(pageIndex);
    System.out.println("Mid page:");
    printPageInfo(page,pageIndex);

    pageIndex++;
    if(pageIndex < pageCount)
    {
      System.out.println("Next page:");
      printPageInfo(page.getNext(),pageIndex);
    }
  }

  private void printPageInfo(
    Page page,
    int index
    )
  {
    // 1. Showing basic page information...
    System.out.println(" Index (calculated): " + page.getIndex() + " (should be " + index + ")");
    System.out.println(" ID: " + ((PdfReference)page.getBaseObject()).getID());
    PdfDictionary pageDictionary = page.getBaseDataObject();
    System.out.println(" Dictionary entries:");
    for(PdfName key : pageDictionary.keySet())
    {System.out.println("  " + key.getValue());}

    // 2. Showing page contents information...
    Contents contents = page.getContents();
    System.out.println(" Content objects count: " + contents.size());
    System.out.println(" Content head (operations):");
    {
      int i = 0,
        count = contents.size();
      while(i < 10
        && i < count)
      {i = printContentObject(contents.get(i),i,0);}
    }

    // 3. Showing page resources information...
    {
      Resources resources = page.getResources();
      System.out.println(" Resources:");
      Map subResources = resources.getFonts();
      if(subResources != null)
      {System.out.println("  Font count: " + subResources.size());}

      subResources = resources.getXObjects();
      if(subResources != null)
      {System.out.println("  XObjects count: " + subResources.size());}

      subResources = resources.getColorSpaces();
      if(subResources != null)
      {System.out.println("  ColorSpaces count: " + subResources.size());}
    }
  }

  private int printContentObject(
    ContentObject content,
    int index,
    int level
    )
  {
    String indentation;
    {
      StringBuffer buffer = new StringBuffer();
      for(int i = 0; i < level; i++)
      {buffer.append(' ');}
      indentation = buffer.toString();
    }

    /*
      NOTE: Contents are expressed through both simple operations and composite objects.
    */
    if(content instanceof Operation)
    {System.out.println("   " + indentation + (++index) + ": " + content.toString());}
    else if(content instanceof CompositeObject)
    {
      System.out.println(
        "   " + indentation + content.getClass().getSimpleName()
          + "\n   " + indentation + "{"
        );
      List<? extends ContentObject> objects = ((CompositeObject)content).getObjects();
      for(ContentObject obj : objects)
      {if((index = printContentObject(obj,index,level+1)) > 9) break;}
      System.out.println("   " + indentation + "}");
    }
    return index;
  }
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.