com.pnf.plugin.ole.parser.StreamReader.java Source code

Java tutorial

Introduction

Here is the source code for com.pnf.plugin.ole.parser.StreamReader.java

Source

/*******************************************************************************
 * Copyright (c) 2015 PNF Software, Inc.
 *
 *     https://www.pnfsoftware.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package com.pnf.plugin.ole.parser;

import static com.pnf.plugin.ole.parser.streams.Stream.COMP_OBJ;
import static com.pnf.plugin.ole.parser.streams.Stream.DOC_SUMM;
import static com.pnf.plugin.ole.parser.streams.Stream.OBJ_INFO;
import static com.pnf.plugin.ole.parser.streams.Stream.OLE;
import static com.pnf.plugin.ole.parser.streams.Stream.SUMM_INFO;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.poi.hpsf.CustomProperties;
import org.apache.poi.hpsf.CustomProperty;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hssf.dev.BiffParser;
import org.apache.poi.hssf.dev.BiffParser.RecordState;

import com.pnf.plugin.ole.FormatTypes;
import com.pnf.plugin.ole.display.ByteObject;
import com.pnf.plugin.ole.parser.officeart.OfficeArt;
import com.pnf.plugin.ole.parser.officeart.OfficeArtFactory;
import com.pnf.plugin.ole.parser.ppt.CurrentUserAtom;
import com.pnf.plugin.ole.parser.ppt.DocumentAtom;
import com.pnf.plugin.ole.parser.ppt.DocumentContainer;
import com.pnf.plugin.ole.parser.ppt.HandoutContainer;
import com.pnf.plugin.ole.parser.ppt.NotesContainer;
import com.pnf.plugin.ole.parser.ppt.PersistDirectoryAtom;
import com.pnf.plugin.ole.parser.ppt.PersistDirectoryEntry;
import com.pnf.plugin.ole.parser.ppt.RecordFactory;
import com.pnf.plugin.ole.parser.ppt.RecordHeader;
import com.pnf.plugin.ole.parser.ppt.UserEditAtom;
import com.pnf.plugin.ole.parser.streams.DocumentStream;
import com.pnf.plugin.ole.parser.streams.Stream;
import com.pnf.plugin.ole.parser.streams.VbaContainerStream;
import com.pnf.plugin.ole.parser.vba.VBAMacroExtractor;
import com.pnf.plugin.ole.parser.xls.BiffRecord;
import com.pnf.plugin.ole.relay.AbstractStreamable;
import com.pnf.plugin.ole.relay.Message;
import com.pnf.plugin.ole.relay.MessageHandler;
import com.pnfsoftware.jeb.core.output.ItemClassIdentifiers;
import com.pnfsoftware.jeb.core.output.tree.INode;

public class StreamReader {
    // Known stream constants
    private static final int COMP_HEADER_LENGTH = 28;

    // Other constants
    private static final char[] HEX_CHARS = "0123456789acbdef".toCharArray();

    // Fields
    private Map<Stream, ProcessEntry> processedMap;
    private Set<Integer> parsedOffsets;
    private CurrentUserAtom currentUser;
    private MessageHandler msgHandler;

    public StreamReader() {
        processedMap = new HashMap<>();
        parsedOffsets = new HashSet<>();
    }

    public ProcessEntry getEntry(Stream e) {
        return processedMap.get(e);
    }

    private void addEntry(Stream key, ProcessEntry value) {
        processedMap.put(key, value);
    }

    public boolean processGeneric(Set<DocumentStream> entries) {
        for (DocumentStream s : entries) {
            if (!s.isProcessed()) {
                try {
                    setMessageHandler(s);
                    s.setProcessed(processGeneric(s));
                } catch (BufferUnderflowException e) {
                    addMessage("Encountered error while parsing generic stream.", "", Message.WARNING);
                }
            }
        }

        return true;
    }

    private boolean processGeneric(DocumentStream e) {
        String type = FormatTypes.OLE_STREAM;
        List<INode> list;

        if (Stream.isSameStream(e, OBJ_INFO)) {
            list = readObjInfoStream(e.getBuffer());
        } else if (Stream.isSameStream(e, OLE)) {
            list = readOleStream(e.getBuffer());
        } else if (Stream.isSameStream(e, COMP_OBJ)) {
            list = readCompObjStream(e.getBuffer());
        } else if (Stream.isSameStream(e, DOC_SUMM)) {
            list = readDocSummaryStream(e.getBuffer());
        } else if (Stream.isSameStream(e, SUMM_INFO)) {
            list = readSummaryInfoStream(e.getBuffer());
        } else {
            list = null;
        }

        if (list != null) {
            e.setExpandView(true);
            addEntry(e, new ProcessEntry(list, type, false, e.getExpandView()));
            return true;
        }

        return false;
    }

    public boolean processXLSStreams(Set<DocumentStream> streams) {
        // Process XLS specific streams (Workbook, etc)
        for (DocumentStream s : streams)
            if (!s.isProcessed()) {
                setMessageHandler(s);
                s.setProcessed(processXLSStream(s));
            }
        return true;
    }

    private boolean processXLSStream(DocumentStream s) {
        String type = FormatTypes.XLS_STREAM;
        if (Stream.isSameStream(s, Stream.XLS_WORKBOOK)) {
            List<INode> list = null;
            if ((list = processWorkbook(s)) != null) {
                addEntry(s, new ProcessEntry(list, type, true, s.getExpandView()));
                return true;
            }
        }

        return false;
    }

    public boolean processPPTStreams(Set<DocumentStream> streams) {
        // Process XLS specific streams (Workbook, etc)
        for (DocumentStream s : streams) {
            if (!s.isProcessed()) {
                try {
                    setMessageHandler(s);
                    s.setProcessed(processPPTStream(s));
                } catch (BufferUnderflowException | NullPointerException e) {
                    addMessage("Encountered fatal error parsing PPT stream.", "", Message.WARNING);
                }
            }
        }

        return true;
    }

    private boolean processPPTStream(DocumentStream s) {
        String type = FormatTypes.PPT_STREAM;
        List<INode> list = null;

        if (Stream.isSameStream(s, Stream.PPT_CURR_USER))
            list = processCurrentUser(s);
        else if (Stream.isSameStream(s, Stream.PPT_PICTURES))
            list = processPictures(s);
        else if (Stream.isSameStream(s, Stream.PPT_DOC))
            list = processPptDocument(s);

        if (list != null) {
            addEntry(s, new ProcessEntry(list, type, false, s.getExpandView()));
            return true;
        }

        return false;
    }

    private void setMessageHandler(MessageHandler handler) {
        msgHandler = handler;
    }

    /* Powerpoint processing */
    private List<INode> processCurrentUser(DocumentStream entry) {
        currentUser = new CurrentUserAtom(entry, entry.getBuffer(), "CurrentUserAtom");
        entry.setExpandView(true);

        return Arrays.asList(new INode[] { currentUser.getEntry() });
    }

    private List<INode> processPictures(DocumentStream entry) {
        ByteBuffer buff = entry.getBuffer();
        StreamEntry root = new StreamEntry("rgfb", "array", "", StreamEntry.toHex(buff.position()),
                String.valueOf(buff.remaining()));
        List<OfficeArt> records = OfficeArtFactory.createRecords(entry, buff);

        for (OfficeArt o : records) {
            entry.addEmbeddedData(o);
            root.addChild(o.getEntry());
        }

        List<INode> entries = new LinkedList<INode>();
        entries.add(root);
        return entries;
    }

    private List<INode> processPptDocument(DocumentStream entry) {
        ByteBuffer src = entry.getBuffer();
        List<INode> records = new LinkedList<>();
        StreamEntry liveRecs = new StreamEntry("Live Records");
        StreamEntry pObjDirectory = new StreamEntry("Persist Object Directory");

        if (currentUser == null)
            return null;

        // Processing as per MS-PPT 2.1.2
        // Part 1: Construct the persist object directory
        int leOffset = currentUser.getCurrentEditOffset();
        int start = leOffset;

        String nameFormat = "Container";
        UserEditAtom firstAtom = null;
        Map<Integer, Integer> globalOffsetMap = new HashMap<>();

        for (int i = 0; leOffset != 0; i++) {
            // Process records using current information
            repositionBuffer(src, leOffset);
            UserEditAtom currEdit = new UserEditAtom(entry, src, nameFormat + " [" + i + "]", true);
            pObjDirectory.addChild(currEdit.getEntry());

            if (i == 0)
                firstAtom = currEdit;

            i++;
            repositionBuffer(src, currEdit.getPersistDirectoryOffset());
            PersistDirectoryAtom pdAtom = new PersistDirectoryAtom(entry, src, nameFormat + " [" + i + "]", true);

            // Part 1, Step 8
            // Add all persist object id -> persist object offset mappings from current atom
            for (PersistDirectoryEntry pdEntry : pdAtom.getDirectoryEntries())
                globalOffsetMap.putAll(pdEntry.getOffsetMap());

            pObjDirectory.addChild(pdAtom.getEntry());

            // set up for processing next atom
            leOffset = currEdit.getLastEditOffset();
        }

        liveRecs.addChild(pObjDirectory);

        // Part 2: Identify the document persist object
        if (firstAtom == null) {
            addMessage("Could not parse initial UserEditAtom", StreamEntry.toHex(start), Message.WARNING);
            return null;
        }

        int docPersistId = firstAtom.getDocumentObjectId();
        int docPersistOffset = globalOffsetMap.get(docPersistId);
        repositionBuffer(src, docPersistOffset);

        DocumentContainer docPersistObject = new DocumentContainer(entry, src, "Document Persist Object", true);
        DocumentAtom docAtom = docPersistObject.getDocumentAtom();
        liveRecs.addChild(docPersistObject.getEntry());

        // Part 3: Identify the notes master slide persist object
        if (docAtom.getNotesPersistId().getValue() != 0) {
            repositionBuffer(src, globalOffsetMap.get(docAtom.getNotesPersistId().getValue()));
            NotesContainer ntContainer = new NotesContainer(entry, src, "Notes Master Slide Persist Object", true,
                    true);
            liveRecs.addChild(ntContainer.getEntry());
        }

        // Part 4: Identify the handout master slide persist object
        if (docAtom.getHandoutPersistId().getValue() != 0) {
            repositionBuffer(src, globalOffsetMap.get(docAtom.getHandoutPersistId().getValue()));
            HandoutContainer hdContainer = new HandoutContainer(entry, src, "Handout Master Slide Persist Object",
                    true);
            liveRecs.addChild(hdContainer.getEntry());
        }

        // Part 5: Identify the main master slide and the title master slide persist objects
        // TODO Implement full parsing of record/sub-record for code below to work
        /*
            
        StreamEntry masterPersistObj = new StreamEntry("Master Slide Persist Objects");
        MasterListWithTextContainer mlContainer = docPersistObject.getMasterList();
        masterPersistObj.addChild(mlContainer.getEntry());
            
        for(MasterPersistAtom mpAtom: mlContainer.getAtoms()){
           int offset = globalOffsetMap.get(mpAtom.getPersistId().getValue());
           repositionBuffer(src, offset);
            
           MasterOrSlideContainer curr = MasterOrSlideContainer.create(entry, src, "Main Master/Slide Persist Object
           masterPersistObj.addChild(curr);
        }
            
        liveRecs.addChild(masterPersistObj);
            
         */

        // Part 6: Identify the presentation slide persist objects
        // TODO Implement full record/sub-record parsing for code below to work
        /*
            
        SlideListWithTextContainer slideContainer = docPersistObject.getSlideList();
        if(slideContainer != null){
           StreamEntry slidePersistObj = new StreamEntry("Presentation Slide Persist Objects");
           for(SlideListWithTextSubContainerOrAtom slAtom: slideContainer.getAtoms()){
          int offset = globalOffsetMap.get(slAtom.getPersistId().getValue());
          repositionBuffer(src, offset);
            
          SlideContainer curr = new SlideContainer(entry, src, "Slide Persist Object");
          slidePersistObj.addChild(curr);
           }
            
           liveRecs.addChild(slidePersistObj);
        }
            
         */

        // Part 7: Identify the notes slide persist objects
        // TODO Implement full record/sub-record parsing for code below to work
        /*
            
         NotesListWithTextContainer notesContainer = docPersistObject.getNotesList();
         if(notesContainer != null){
        StreamEntry notesPersistObj = new StreamEntry("Notes Persist Objects");
        for(NotesPersistAtom npAtom: notesContainer.getAtoms()){
           int offset = globalOffsetMap.get(npAtom.getPersistId().getValue());
           repositionBuffer(src, offset);
            
          NotesContainer curr = new NotesContainer(entry, src, "Notes Slide/Master Persist Object", false);
          notesPersistObj.addChild(curr);
        }
            
        liveRecs.addChild(notesPersistObj);
         }
            
         */

        // Part 8: Identify the ActiveX control persist objects
        // TODO Implement full record/sub-record parsing for code below to work
        /*
         ExObjListContainer objList = docPersistObj.getExternalObjectsList();
         if(objList != null){
        StreamEntry externalObjects = new StreamEntry("ActiveX Control Persist Objects");
        for(ExControlContainer container: objList.getControlContainers()){
           ExOleObjAtom exAtom = container.getOleObjAtom();
           int offset = globalOffsetMap.get(exAtom.getPersistId().getValue());
           repositionBuffer(src, offset);
          ExControlStg curr = new ExControlStg(entry, src, "ActiveX Storage Persist Object");
          externalObjects.addChild(curr);
        }
            
        liveRecs.addChild(externalObjects);
            
        // Part 9: Identify the embedded OLE object persist objects
        // TODO Implement full record/sub-record parsing for code below to work
            
           StreamEntry oleObjects = new StreamEntry("Embedded OLE Persist Objects");
        for(ExOleEmbedContainer container: objList.getOleContainers()){
           ExOleObjAtom exAtom = container.getOleObjAtom();
           int offset = globalOffsetMap.get(exAtom.getPersistId().getValue());
           repositionBuffer(src, offset);
            
          ExOleObjStg curr = new ExControlStg(entry, src, "OLE Persist Object");
          oleObjects.addChild(curr);
        }
            
        liveRecs.addChild(oleObjects);
         }
            
        // Part 10: Identify the linked OLE object persist objects
        // TODO Implement full record/sub-record parsing for code below to work
            
           StreamEntry linkedObjects = new StreamEntry("Linked OLE Persist Objects");
        for(ExOleLinkContainer container: objList.getOleLinkedContainers()){
           ExOleObjAtom exAtom = container.getOleObjAtom();
           int offset = globalOffsetMap.get(exAtom.getPersistId().getValue());
           repositionBuffer(src, offset);
            
          ExOleObjStg curr = new ExControlStg(entry, src, "OLE Persist Object");
          linkedObjects.addChild(curr);
        }
            
        liveRecs.addChild(linkedObjects);
         }
            
         */

        // Part 11: Identify the VBA project persist object
        // TODO Implement full record/sub-record parsing for code below to work
        /*
            
        DocInfoListContainer infoContainer = docPersistObject.getSlideList();
        if(infoContainer != null){
           VbaInfoContainer vbaInfo = infoContainer.getVbaInfoContainer();
           if(vbaInfo != null){
          VbaInfoAtom vbaAtom = vbaInfo.getAtom();
          int offset = globalOffsetMap.get(vbaAtom.getPersistId());
          repositionBuffer(src, offset);
            
          VbaProjectStg projectStg = new VbaProjectStg(entry, src, "VBA Project Persist Object");
          liveRecs.addChild(projectStg.getEntry());
           }
        }
            
         */

        // Parse the rest of the records, starting at the beginning
        src.rewind();
        StreamEntry deadRecs = new StreamEntry("Dead Records");

        try {
            for (int i = 0; src.remaining() >= RecordHeader.SIZE; i++) {
                int base = src.position();
                RecordHeader header = new RecordHeader(src, "rh");

                if (!parsedOffsets.contains(base)) {
                    AbstractStreamable record = RecordFactory.createRecord(entry, src, header,
                            "Dead record [" + i + "]");
                    if (record != null) {
                        deadRecs.addChild(record);

                        if (record instanceof ByteObject)
                            entry.addEmbeddedData((ByteObject) record);
                    }
                }

                //Advance to the next record
                src.position(base + RecordHeader.SIZE + header.getRecLen());
            }
        } catch (BufferUnderflowException e) {
            addMessage("Parsing encountered an error", StreamEntry.toHex(src.position()), Message.WARNING);
        } catch (IllegalArgumentException e) {
            addMessage("Parsed record with length that went beyond end of stream.",
                    StreamEntry.toHex(src.position()), Message.WARNING);
        }

        records.add(liveRecs);
        records.add(deadRecs);

        return records;
    }

    private void repositionBuffer(ByteBuffer src, int target) {
        // Set to new position
        src.position(target);

        // Add to set of already parsed offsets
        parsedOffsets.add(target);
    }

    /* Excel processing */
    private List<INode> processWorkbook(DocumentStream workbook) {
        List<INode> entries;
        BiffParser parser = new BiffParser();

        RecordState state = parser.getRecords(new ByteArrayInputStream(workbook.getBuffer().array()), null);
        entries = state.getRecords();

        if (state.hadError()) {
            addMessage(
                    "Encountered error while processing BIFF records in " + Stream.XLS_WORKBOOK
                            + " stream. Could not continue parsing BIFF records.",
                    StreamEntry.toHex(((BiffRecord) entries.get(entries.size() - 1)).getOffset()), Message.CORRUPT);
        }

        return entries;
    }

    /* VBA processing */
    public void processVba(VbaContainerStream stream, Map<String, DocumentStream> vbaEntries) {
        // First process all the streams we know *may* be there
        DocumentStream dir = stream.getDirStream(); // remove to prevent processing again
        DocumentStream project = stream.getProjectStream(); // remove to prevent processing again

        if (project != null)
            addEntry(project, new ProcessEntry(readVbaProjectStream(project.getBuffer()),
                    FormatTypes.VBA_PROJECT_STREAM, false, project.getExpandView()));

        if (dir != null) {
            Map<String, String> map = VBAMacroExtractor
                    .extractMacros(new ByteArrayInputStream(dir.getBuffer().array()), vbaEntries);

            // Processed the modules we found
            if (map != null) {
                for (String s : map.keySet()) {
                    ProcessEntry en = new ProcessEntry(map.get(s), FormatTypes.VBA_MODULE);
                    addEntry(vbaEntries.get(s), en);
                }
            }
        }
    }

    private List<INode> readVbaProjectStream(ByteBuffer buffer) {
        // PROJECT stream is just several key-value pairs
        List<INode> list = new LinkedList<>();

        BufferedReader reader = null;
        try {
            reader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(buffer.array())));
            String line;
            StreamEntry parent = null;
            boolean added = false;

            while ((line = reader.readLine()) != null) {
                if (!line.isEmpty()) {
                    if (line.trim().startsWith("[") && line.trim().endsWith("]")) { // create sub level
                        added = false;
                        parent = new StreamEntry(line, StreamEntry.EMPTY_CELL, StreamEntry.EMPTY_CELL,
                                StreamEntry.EMPTY_CELL, StreamEntry.EMPTY_CELL);

                        line = reader.readLine();
                        while (line.isEmpty())
                            line = reader.readLine();
                    }

                    String[] keyPair = line.split("=");
                    if (keyPair.length == 2) {
                        StreamEntry current = new StreamEntry(keyPair[0], "Property", keyPair[1],
                                StreamEntry.EMPTY_CELL, StreamEntry.EMPTY_CELL);
                        if (parent != null)
                            parent.addChild(current);

                        if (parent != null && !added) {
                            added = true;
                            list.add(parent);
                        } else if (parent == null)
                            list.add(current);
                    }
                }
            }
        } catch (IOException e) {

        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

        return list;
    }

    private List<INode> readObjInfoStream(ByteBuffer stream) {
        List<INode> nodes = new LinkedList<>();

        /*
         *====Structure for ObjInfo stream====
         * 
         * ODTPersist1:
         *   =============== S E E:  BitSet oPersist1 declaration ===============
         *
         * cf - 2 bytes (specifies format used to transmit data to host application)
         *       0x0001 - Rich Text Format
         *       0x0002 - Text format
         *       0x0003 - Metafile or Enhanced metafile (depends on ODTPersist2.fStoredAsEMF
         *       0x0004 - Bitmap
         *       0x0005 - Device Independent Bitmap
         *       0x000A - HTML format
         *       0x0014 - Unicode text format
         *
         * ODTPersist2:
         *   =============== S E E:  BitSet oPersist2 declaration ===============
         *
         */
        stream.order(ByteOrder.LITTLE_ENDIAN);
        byte[] oPersist1 = new byte[2];
        stream.get(oPersist1);

        short cf_short = stream.getShort();

        byte[] oPersist2 = null;

        if (stream.hasRemaining()) { // has ODTPersist2 field
            oPersist2 = new byte[2];
            stream.get(oPersist2);
        }

        String format = null;

        switch (cf_short) {
        case 0x0001:
            format = "Rich Text format";
            break;
        case 0x0002:
            format = "Text format";
            break;
        case 0x0003:
            format = "Metafile or Enhanced metafile (see ODTPersist2.fStoredAsEMF)";
            break;
        case 0x0004:
            format = "Bitmap";
            break;
        case 0x0005:
            format = "Device Independent Bitmap";
            break;
        case 0x000A:
            format = "HTML format";
            break;
        case 0x0014:
            format = "Unicode text format";
            break;
        default:
            break;
        }

        StreamEntry cfEntry = new StreamEntry("cf", "short", StreamEntry.toHex(cf_short), "0x2", "2");
        cfEntry.addChild(new StreamEntry("format", "String", format));
        nodes.add(cfEntry);

        /*
         * ODTPersist1 - 2 bytes (info about embedded OLE document)
         *      [0] reserved1 (1 bit) - undefined
         *      [1] fDefHandler (1 bit) - application must assume that this OLE objects class id is {}
         *      [2] reserved2 (1 bit) - undefined
         *      [3] reserved3 (1 bit) - undefined
         *      [4] fLink (1 bit) - specifies whether this OLE object is a link
         *      [5] reserved4 (1 bit) - undefined
         *      [6] fIcon (1 bit) - specifies whether this OLE object is being represented by an icon
         *      [7] fIsOle1 (1 bit) - specifies whether this OLE object is only compatible with OLE 1. If this bit is zero, then the object is compatible with OLE 2
         *      [8] fManual (1 bit) - specifies whether the user has requested that this OLE object only be updated in response to a user action (0 - user request updated automatically)
         *            if fLink is 0, fManual is undefined
         *      [9] fRecomposeonResize (1 bit) - specifies whether this OLE object has requested to be notified when it is resized by its container
         *      [10] reserved5 (1 bit) - MUST be zero (and must be ignored)
         *      [11] reserved6 (1 bit) - MUST be zero (and must be ignored)
         *      [12] fOCX (1 bit) - specifies whether this object is an [OLE control] (see docs for more info)
         *      [13] fStream (1 bit) - if fOCX is zero, then this MUST be zero. If fOCX is 1, fStream defines whether this OLE control stores its data in a single stream instead of a storage
         *            if fStream is 1, OLE control stores data in a stream called \003OCXDATA
         *      [14] reserved7 (1 bit) - undefined
         *      [15] fViewObject (1 bit) - specifies whether this object supports the IViewObject interface
         *
         */

        StreamEntry oPer1 = new StreamEntry("ODTPersist1", "struct", "", "0x0", "2");
        BitSet odtPersist1 = BitSet.valueOf(oPersist1);
        oPer1.addChild(
                new StreamEntry("reserved1", "undefined", String.valueOf(odtPersist1.get(0)), "0x0 [0]", "1 bit"));
        oPer1.addChild(
                new StreamEntry("fDefHandler", "flag", String.valueOf(odtPersist1.get(1)), "0x0 [1]", "1 bit"));
        oPer1.addChild(
                new StreamEntry("reserved2", "undefined", String.valueOf(odtPersist1.get(2)), "0x0 [2]", "1 bit"));
        oPer1.addChild(
                new StreamEntry("reserved3", "undefined", String.valueOf(odtPersist1.get(3)), "0x0 [3]", "1 bit"));
        oPer1.addChild(new StreamEntry("fLink", "flag", String.valueOf(odtPersist1.get(4)), "0x0 [4]", "1 bit"));
        oPer1.addChild(
                new StreamEntry("reserved4", "undefined", String.valueOf(odtPersist1.get(5)), "0x0 [5]", "1 bit"));
        oPer1.addChild(new StreamEntry("fIcon", "flag", String.valueOf(odtPersist1.get(6)), "0x0 [6]", "1 bit"));
        oPer1.addChild(new StreamEntry("fIsOle1", "flag", String.valueOf(odtPersist1.get(7)), "0x0 [7]", "1 bit"));
        oPer1.addChild(new StreamEntry("fManual", "flag (undef. if fLink=0)", String.valueOf(odtPersist1.get(8)),
                "0x0 [8]", "1 bit"));
        oPer1.addChild(new StreamEntry("fRecomposeOnResize", "flag", String.valueOf(odtPersist1.get(9)), "0x0 [9]",
                "1 bit"));

        StreamEntry res5 = new StreamEntry("reserved5", "undefined (MUST be zero)",
                String.valueOf(odtPersist1.get(10)), "0x0 [10]", "1 bit");
        mustEqual(res5, odtPersist1.get(10) ? 1 : 0, 0x0, "0x0 [10]", "reserved5");

        oPer1.addChild(res5);

        StreamEntry res6 = new StreamEntry("reserved6", "undefined (MUST be zero)",
                String.valueOf(odtPersist1.get(11)), "0x0 [11]", "1 bit");
        mustEqual(res6, odtPersist1.get(11) ? 1 : 0, 0x0, "0x0 [11]", "reserved6");

        oPer1.addChild(res6);

        oPer1.addChild(new StreamEntry("fOCX", "flag", String.valueOf(odtPersist1.get(12)), "0x0 [12]", "1 bit"));
        oPer1.addChild(
                new StreamEntry("fStream", "flag", String.valueOf(odtPersist1.get(13)), "0x0 [13]", "1 bit"));
        oPer1.addChild(new StreamEntry("reserved7", "undefined", String.valueOf(odtPersist1.get(14)), "0x0 [14]",
                "1 bit"));
        oPer1.addChild(
                new StreamEntry("fViewObject", "flag", String.valueOf(odtPersist1.get(15)), "0x0 [15]", "1 bit"));

        nodes.add(oPer1);

        StreamEntry oPer2 = oPersist2 == null ? null : new StreamEntry("ODTPersist2", "struct", "", "0x4", "2");
        BitSet odtPersist2 = oPersist2 == null ? null : BitSet.valueOf(oPersist2);

        if (oPer2 != null) {
            /* Parse ODTPersist2 object if present
             * 
             * ODTPersist2 - 2 bytes (optional, only if stream size allows for it)
             *      fEMF (1 bit) - specifies whether the presentation of
             *      reserved (1 bit) - MUST be zero and MUST be ignored
             *      fQueriedEMF (1 bit) - specifies whether the application that saved this binary file had queried this OLE object to determine whether it supported the Enhanced Metafile format
             *      fStoredAsEMF (1 bit) - specifies that this OLE object supports the Enhanced Metafile format
             *      reserved2 (1 bit) - undefined
             *      reserved3 (1 bit) - undefined
             *      reserved4 (10 bits) - undefined
             *
             */

            oPer2.addChild(new StreamEntry("fEMF", "flag", String.valueOf(odtPersist2.get(0)), "0x4 [0]", "1 bit"));

            StreamEntry o2Res1 = new StreamEntry("reserved1", "undefined (MUST be zero)",
                    String.valueOf(odtPersist2.get(1)), "0x4 [1]", "1 bit");
            oPer2.addChild(o2Res1);
            mustEqual(o2Res1, odtPersist2.get(1) ? 1 : 0, 0x0, "0x4 [1]", "reserved1");

            oPer2.addChild(
                    new StreamEntry("fQueriedEMF", "flag", String.valueOf(odtPersist2.get(2)), "0x4 [2]", "1 bit"));
            oPer2.addChild(new StreamEntry("fStoredAsEMF", "flag", String.valueOf(odtPersist2.get(3)), "0x4 [3]",
                    "1 bit"));
            oPer2.addChild(new StreamEntry("reserved2", "undefined", String.valueOf(odtPersist2.get(4)), "0x4 [4]",
                    "1 bit"));
            oPer2.addChild(new StreamEntry("reserved3", "undefined", String.valueOf(odtPersist2.get(5)), "0x4 [5]",
                    "1 bit"));

            for (int i = 6; i <= 15; i++) {
                oPer2.addChild(new StreamEntry("reserved4 [" + (i - 6) + "]", "undefined",
                        String.valueOf(odtPersist2.get(i)), "0x4 [" + i + "]", "1 bit"));
            }

            nodes.add(oPer2);
        }

        return nodes;
    }

    private List<INode> readOleStream(ByteBuffer stream) {
        /* Ole Stream */
        //TODO finish parsing

        List<INode> roots = new LinkedList<>();
        int pos = stream.position();
        int version = stream.getInt();

        StreamEntry versionEnt = new StreamEntry("Version", "int", StreamEntry.toHex(version), String.valueOf(pos),
                StreamEntry.toHex(4));
        mustEqual(versionEnt, version, 0x02000001, StreamEntry.toHex(pos), "Version");

        roots.add(versionEnt);

        pos = stream.position();
        int flag = stream.getInt();
        StreamEntry flags = new StreamEntry("Flags", "int", StreamEntry.toHex(flag), String.valueOf(pos),
                StreamEntry.toHex(4));
        flags.addChild(new StreamEntry("OLEStream Type", "",
                flag == 0 ? "Embedded object" : (flag == 1 ? "Linked object" : "Unknown")));
        roots.add(flags);

        pos = stream.position();
        roots.add(new StreamEntry("LinkUpdateOption", "int", StreamEntry.toHex(stream.getInt()),
                String.valueOf(pos), StreamEntry.toHex(4)));

        pos = stream.position();
        int res1Field = stream.getInt();
        StreamEntry res1 = new StreamEntry("Reserved1", "int", StreamEntry.toHex(res1Field), String.valueOf(pos),
                StreamEntry.toHex(4));
        mustEqual(res1, res1Field, 0x0, StreamEntry.toHex(pos), "Reserved1");

        roots.add(res1);

        int loc = stream.position();
        int reservedMonikerStreamSize = stream.getInt();
        roots.add(new StreamEntry("ReservedMonikerStreamSize", "uint32",
                StreamEntry.toHex(reservedMonikerStreamSize), StreamEntry.toHex(loc), String.valueOf(4)));

        if (reservedMonikerStreamSize > 0) {
            if (stream.remaining() < reservedMonikerStreamSize) {
                addMessage("Encoded size greater than total data remaining.",
                        StreamEntry.toHex(stream.position() - 4), Message.AREA_OF_INTEREST);
            } else {
                MonikerStream reservedMonikerStream = new MonikerStream(reservedMonikerStreamSize, stream,
                        "ReservedMonikerStream");
                roots.add(reservedMonikerStream.getEntry());
            }
        }

        // Ignore rest of fields as we will only encounter the \1Ole stream in embedded objects
        return roots;
    }

    private List<INode> readDocSummaryStream(ByteBuffer stream) {
        List<INode> roots = new LinkedList<>();
        String propType = "Property";

        try {
            DocumentSummaryInformation info = (DocumentSummaryInformation) PropertySetFactory
                    .create(new ByteArrayInputStream(stream.array()));

            propType = "int";
            StreamEntry counts = new StreamEntry("Counts");
            if (info.getLineCount() != 0)
                counts.addChild(new StreamEntry("Line count", propType, String.valueOf(info.getLineCount())));

            if (info.getByteCount() != 0)
                counts.addChild(new StreamEntry("Byte count", propType, String.valueOf(info.getByteCount())));

            if (info.getHiddenCount() != 0)
                counts.addChild(new StreamEntry("Hidden count", propType, String.valueOf(info.getHiddenCount())));

            if (info.getMMClipCount() != 0)
                counts.addChild(new StreamEntry("MMClip count", propType, String.valueOf(info.getMMClipCount())));

            if (info.getNoteCount() != 0)
                counts.addChild(new StreamEntry("Note count", propType, String.valueOf(info.getNoteCount())));

            if (info.getParCount() != 0)
                counts.addChild(new StreamEntry("Par count", propType, String.valueOf(info.getParCount())));

            if (info.getSlideCount() != 0)
                counts.addChild(new StreamEntry("Slide count", propType, String.valueOf(info.getSlideCount())));

            if (counts.size() > 0)
                roots.add(counts);

            propType = "String";
            StreamEntry strings = new StreamEntry("Strings");
            strings.addChild(new StreamEntry("Category", propType, info.getCategory()));
            strings.addChild(new StreamEntry("Company", propType, info.getCompany()));
            strings.addChild(new StreamEntry("Manager", propType, info.getManager()));
            strings.addChild(new StreamEntry("Presentation Format", propType, info.getPresentationFormat()));

            roots.add(strings);

            propType = "Custom";
            StreamEntry custom = new StreamEntry("User-defined properties");
            CustomProperties props = info.getCustomProperties();

            if (props != null) {
                Set<Entry<Object, CustomProperty>> entries = props.entrySet();

                for (Entry<Object, CustomProperty> e : entries) {
                    try {
                        custom.addChild(new StreamEntry(e.getValue().getName(), propType,
                                e.getValue().getValue().toString()));
                    } catch (NullPointerException ex) {
                        // ignore any errors
                    }
                }
            }
        } catch (Throwable t) {
            addMessage("Attempted to read " + DOC_SUMM + " stream but no property sets were found.", null,
                    Message.CORRUPT);
        }

        return roots;
    }

    private List<INode> readSummaryInfoStream(ByteBuffer stream) {
        List<INode> roots = new LinkedList<>();
        String propType = "Property";

        try {
            SummaryInformation sInfo = new SummaryInformation(new PropertySet(stream.array()));

            StreamEntry cInfo = new StreamEntry("Creation Information");
            cInfo.addChild(new StreamEntry("Application Name", propType, sInfo.getApplicationName()));
            cInfo.addChild(new StreamEntry("Creation", "Time",
                    sInfo.getCreateDateTime() != null ? sInfo.getCreateDateTime().toString() : null));
            cInfo.addChild(new StreamEntry("Author", propType, sInfo.getAuthor()));
            cInfo.addChild(new StreamEntry("Last Author", propType, sInfo.getLastAuthor()));
            cInfo.addChild(new StreamEntry("Template", propType, sInfo.getTemplate()));
            roots.add(cInfo);

            propType = "Time";
            StreamEntry timeInfo = new StreamEntry("Times");
            timeInfo.addChild(new StreamEntry("Total Edit Time", propType, String.valueOf(sInfo.getEditTime())));
            timeInfo.addChild(new StreamEntry("Last Saved", propType,
                    sInfo.getLastSaveDateTime() != null ? sInfo.getLastSaveDateTime().toString() : null));
            timeInfo.addChild(new StreamEntry("Last Printed", propType,
                    sInfo.getLastPrinted() != null ? sInfo.getLastPrinted().toString() : null));
            roots.add(timeInfo);

            propType = "Misc";
            StreamEntry misc = new StreamEntry("Miscellaneous");
            misc.addChild(new StreamEntry("OS Version", "int", String.valueOf(sInfo.getOSVersion())));
            misc.addChild(new StreamEntry("Revision Number", "int", sInfo.getRevNumber()));
            misc.addChild(new StreamEntry("Page Count", "int", String.valueOf(sInfo.getPageCount())));
            misc.addChild(new StreamEntry("Word Count", "int", String.valueOf(sInfo.getWordCount())));

            int secVal = sInfo.getSecurity();
            String security = null;

            if (!sInfo.wasNull()) { // Set description according to POI documentation
                switch (secVal) {
                case 0:
                    security = "No security";
                    break;
                case 1:
                    security = "Password protected";
                    break;
                case 2:
                    security = "Read-only recommended";
                    break;
                case 4:
                    security = "Read-only enforced";
                    break;
                case 8:
                    security = "Locked for annotations";
                    break;
                default:
                    break;
                }

                security += " (code " + secVal + ")";
            } else {
                security = "Field not set";
            }

            misc.addChild(new StreamEntry("Document Security", "int", security));
            misc.addChild(new StreamEntry("Subject", propType, sInfo.getSubject()));
            misc.addChild(new StreamEntry("Keywords", propType, sInfo.getKeywords()));
            roots.add(misc);
        } catch (Throwable t) {
            addMessage("Attempted to read " + SUMM_INFO + " stream but no property sets were found.", null,
                    Message.CORRUPT);
        }

        return roots;
    }

    private List<INode> readCompObjStream(ByteBuffer stream) {
        List<INode> roots = new LinkedList<>();

        stream.order(ByteOrder.LITTLE_ENDIAN);
        int pos = stream.position();

        roots.add(new StreamEntry("Reserved1", "arbitrary value", StreamEntry.toHex(stream.getInt()),
                String.valueOf(pos), "4"));
        pos = stream.position();
        roots.add(new StreamEntry("Version", "arbitrary value", StreamEntry.toHex(stream.getInt()),
                String.valueOf(pos), "4"));

        pos = stream.position();
        byte[] res2Data = new byte[COMP_HEADER_LENGTH - pos];
        stream.get(res2Data);

        StreamEntry res2 = new StreamEntry("Reserved2", "arbitrary value", toHex(res2Data), StreamEntry.toHex(pos),
                "20");
        roots.add(res2);

        EncodedString ansiUserType = getLengthPrefixedAnsiString(stream, "AnsiUserType");
        StreamEntry entry = ansiUserType.getEntry();
        roots.add(entry);

        // Mark position so we can come back to it if necessary
        stream.mark();

        // Retrieve AnsiClipboardFormat field
        int add = stream.position();
        int markerOrLength = stream.getInt();

        if (markerOrLength != 0) {
            StreamEntry e = new StreamEntry("AnsiClipboardFormat", EncodedString.CLIP_FOAS, StreamEntry.EMPTY_CELL,
                    String.valueOf(add), String.valueOf(0));
            String name = "FormatOrAnsiString";

            // From OLE specs
            byte[] check = { (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF };
            byte[] check2 = { (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFE };

            stream.reset(); // go back to start to check bytes
            boolean first = checkBytes(stream, stream.position(), check);

            stream.reset();
            boolean second = checkBytes(stream, stream.position(), check2);
            stream.reset();

            e.addChild(new StreamEntry("MarkerOrLength", "int", String.valueOf(markerOrLength), String.valueOf(add),
                    String.valueOf(4)));

            if (first || second) {
                // Must be a standard clipboard format of length 4 bytes
                add = stream.position();
                int id = stream.getInt();

                e.addChild(new StreamEntry(name, "StandardClipboardFormat", String.valueOf(id), String.valueOf(add),
                        String.valueOf(4)));
            } else {
                // Must be an ansi string
                stream.reset(); // go back to the position we marked

                EncodedString ansiString = getString(stream, "RegisteredClipboardFormat", stream.getInt());
                e.addChild(ansiString.getEntry());
            }

            roots.add(e);
        }

        stream.position(0); // reset stream in case we want to parse again

        return roots;
    }

    private static boolean checkBytes(ByteBuffer b, int start, byte[] data) {
        b.position(start);
        for (int i = 0; i < data.length; i++) {
            if (b.get() != data[i])
                return false;
        }

        return true;
    }

    private static EncodedString getLengthPrefixedAnsiString(ByteBuffer data, String name) {
        int len = data.getInt();
        return new EncodedString(data, name, EncodedString.LENGTH_PAS, null, len);
    }

    private static EncodedString getString(ByteBuffer data, String name, int byteLen) {
        return new EncodedString(data, name, byteLen);
    }

    private void mustEqual(StreamEntry e, long actual, long expected, String address, String name) {
        mustEqual(e, name + " MUST be " + StreamEntry.toHex(expected) + " but was not.", actual, expected, address);
    }

    private void mustEqual(StreamEntry e, String message, long actual, long expected, String address) {
        if (actual != expected) {
            notEqual(e, message, address);
        }
    }

    private void notEqual(StreamEntry e, String message, String address) {
        msgHandler.handleMessage(new Message(message, address, Message.CORRUPT));
        if (e != null)
            e.setId(ItemClassIdentifiers.INFO_MALFORMED);
    }

    private void addMessage(String message, String address, int type) {
        msgHandler.handleMessage(new Message(message, address, type));
    }

    public static String toHex(byte... bytes) {
        char[] hexChars = new char[bytes.length * 2];
        for (int j = 0; j < bytes.length; j++) {
            int v = bytes[j] & 0xFF;
            hexChars[j * 2] = HEX_CHARS[v >>> 4];
            hexChars[j * 2 + 1] = HEX_CHARS[v & 0x0F];
        }

        return "0x" + new String(hexChars);
    }
}