package it.stefanochizzolini.clown.samples;
import it.stefanochizzolini.clown.documents.Document;
import it.stefanochizzolini.clown.documents.Page;
import it.stefanochizzolini.clown.documents.Pages;
import it.stefanochizzolini.clown.documents.contents.Contents;
import it.stefanochizzolini.clown.documents.contents.Resources;
import it.stefanochizzolini.clown.documents.contents.objects.CompositeObject;
import it.stefanochizzolini.clown.documents.contents.objects.ContentObject;
import it.stefanochizzolini.clown.documents.contents.objects.Operation;
import it.stefanochizzolini.clown.documents.interchange.metadata.Information;
import it.stefanochizzolini.clown.files.File;
import it.stefanochizzolini.clown.objects.PdfDictionary;
import it.stefanochizzolini.clown.objects.PdfIndirectObject;
import it.stefanochizzolini.clown.objects.PdfName;
import it.stefanochizzolini.clown.objects.PdfReference;
import it.stefanochizzolini.clown.tokens.FileFormatException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
This sample demonstrates how to inspect the structure of a PDF document.
<h3>Remarks</h3>
<p>This implementation is just a limited exercise: see the API documentation
to perform all the possible access functionalities.</p>
*/
public class ParsingSample
implements ISample
{
public void run(
PDFClownSampleLoader loader
)
{
// (boilerplate user choice -- ignore it)
String filePath = loader.getPdfFileChoice("Please select a PDF file");
// 1. Open the PDF file!
File file;
try{file = new File(filePath);}
catch(FileFormatException e){throw new RuntimeException(filePath + " file has a bad file format.",e);}
catch(Exception e){throw new RuntimeException(filePath + " file access error.",e);}
// 2. Parsing the document...
// Get the PDF document!
Document document = file.getDocument();
// 2.1. Showing basic metadata...
System.out.println("\nDocument information:");
Information info = document.getInformation();
if(info == null)
{System.out.println("No information available (Info dictionary doesn't exist).");}
else
{
System.out.println("Author: " + info.getAuthor());
System.out.println("Title: " + info.getTitle());
System.out.println("Subject: " + info.getSubject());
System.out.println("CreationDate: " + info.getCreationDate());
}
System.out.println("\nIterating through the indirect-object collection (please wait)...");
// 2.2. Counting the indirect objects, grouping them by type...
HashMap<String,Integer> objCounters = new HashMap<String,Integer>();
objCounters.put("xref free entry",0);
for(PdfIndirectObject object : file.getIndirectObjects())
{
if(object.isInUse()) // In-use entry.
{
String typeName = object.getDataObject().getClass().getName();
if(objCounters.containsKey(typeName))
{objCounters.put(typeName, objCounters.get(typeName) + 1);}
else
{objCounters.put(typeName, 1);}
}
else // Free entry.
{objCounters.put("xref free entry", objCounters.get("xref free entry") + 1);}
}
System.out.println("\nIndirect objects partial counts (grouped by PDF object type):");
for(Map.Entry<String,Integer> entry : objCounters.entrySet())
{System.out.println(" " + entry.getKey() + ": " + entry.getValue());}
System.out.println("Indirect objects total count: " + file.getIndirectObjects().size());
// 2.3. Showing some page information...
Pages pages = document.getPages();
int pageCount = pages.size();
System.out.println("\nPage count: " + pageCount);
int pageIndex = (int)Math.floor((float)pageCount / 2);
Page page = pages.get(pageIndex);
System.out.println("Mid page:");
printPageInfo(page,pageIndex);
pageIndex++;
if(pageIndex < pageCount)
{
System.out.println("Next page:");
printPageInfo(page.getNext(),pageIndex);
}
}
private void printPageInfo(
Page page,
int index
)
{
// 1. Showing basic page information...
System.out.println(" Index (calculated): " + page.getIndex() + " (should be " + index + ")");
System.out.println(" ID: " + ((PdfReference)page.getBaseObject()).getID());
PdfDictionary pageDictionary = page.getBaseDataObject();
System.out.println(" Dictionary entries:");
for(PdfName key : pageDictionary.keySet())
{System.out.println(" " + key.getValue());}
// 2. Showing page contents information...
Contents contents = page.getContents();
System.out.println(" Content objects count: " + contents.size());
System.out.println(" Content head (operations):");
{
int i = 0,
count = contents.size();
while(i < 10
&& i < count)
{i = printContentObject(contents.get(i),i,0);}
}
// 3. Showing page resources information...
{
Resources resources = page.getResources();
System.out.println(" Resources:");
Map subResources = resources.getFonts();
if(subResources != null)
{System.out.println(" Font count: " + subResources.size());}
subResources = resources.getXObjects();
if(subResources != null)
{System.out.println(" XObjects count: " + subResources.size());}
subResources = resources.getColorSpaces();
if(subResources != null)
{System.out.println(" ColorSpaces count: " + subResources.size());}
}
}
private int printContentObject(
ContentObject content,
int index,
int level
)
{
String indentation;
{
StringBuffer buffer = new StringBuffer();
for(int i = 0; i < level; i++)
{buffer.append(' ');}
indentation = buffer.toString();
}
/*
NOTE: Contents are expressed through both simple operations and composite objects.
*/
if(content instanceof Operation)
{System.out.println(" " + indentation + (++index) + ": " + content.toString());}
else if(content instanceof CompositeObject)
{
System.out.println(
" " + indentation + content.getClass().getSimpleName()
+ "\n " + indentation + "{"
);
List<? extends ContentObject> objects = ((CompositeObject)content).getObjects();
for(ContentObject obj : objects)
{if((index = printContentObject(obj,index,level+1)) > 9) break;}
System.out.println(" " + indentation + "}");
}
return index;
}
}
|