Example usage for org.apache.poi.util LittleEndian getShort

List of usage examples for org.apache.poi.util LittleEndian getShort

Introduction

In this page you can find the example usage for org.apache.poi.util LittleEndian getShort.

Prototype

public static short getShort(byte[] data) 

Source Link

Document

get a short value from the beginning of a byte array

Usage

From source file:com.krawler.esp.fileparser.word.Word6CHPBinTable.java

License:Open Source License

/**
 * Constructor used to read a binTable in from a Word document.
 * //  ww  w .j  a va  2 s . c o  m
 * @param documentStream
 *            The POIFS "WordDocument" stream from a Word document
 * @param offset
 *            The offset of the Chp bin table in the main stream.
 * @param size
 *            The size of the Chp bin table in the main stream.
 * @param fcMin
 *            The start of text in the main stream.
 */
public Word6CHPBinTable(byte[] documentStream, int offset, int size, int fcMin) {
    PlexOfCps binTable = new PlexOfCps(documentStream, offset, size, 2);

    int length = binTable.length();
    for (int x = 0; x < length; x++) {
        GenericPropertyNode node = binTable.getProperty(x);

        int pageNum = LittleEndian.getShort((byte[]) node.getBytes());
        int pageOffset = POIFSConstants.LARGER_BIG_BLOCK_SIZE * pageNum;
        byte[] tableStream = null;
        int size1 = 0;
        TextPieceTable tpttemp = new TextPieceTable(documentStream, tableStream, pageOffset, size1, fcMin);
        CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, pageOffset, fcMin, tpttemp);

        int fkpSize = cfkp.size();

        for (int y = 0; y < fkpSize; y++) {
            _textRuns.add(cfkp.getCHPX(y));
        }
    }
}

From source file:com.krawler.esp.fileparser.word.Word6CHPBinTable.java

License:Open Source License

public Word6CHPBinTable(byte[] documentStream, int offset, int size, int fcMin, TextPieceTable tpt) {
    PlexOfCps binTable = new PlexOfCps(documentStream, offset, size, 2);

    int length = binTable.length();
    for (int x = 0; x < length; x++) {
        GenericPropertyNode node = binTable.getProperty(x);

        int pageNum = LittleEndian.getShort((byte[]) node.getBytes());
        int pageOffset = POIFSConstants.LARGER_BIG_BLOCK_SIZE * pageNum;

        CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, pageOffset, fcMin, tpt);

        int fkpSize = cfkp.size();

        for (int y = 0; y < fkpSize; y++) {
            _textRuns.add(cfkp.getCHPX(y));
        }/*from w w w . j  a  v  a 2  s .  c o  m*/
    }
}

From source file:com.krawler.esp.fileparser.wordparser.Word6CHPBinTable.java

License:Open Source License

/**
 * Constructor used to read a binTable in from a Word document.
 *
 * @param documentStream/*from  w  ww  .  j a v a 2s .  c om*/
 *            The POIFS "WordDocument" stream from a Word document
 * @param offset
 *            The offset of the Chp bin table in the main stream.
 * @param size
 *            The size of the Chp bin table in the main stream.
 * @param fcMin
 *            The start of text in the main stream.
 */
public Word6CHPBinTable(byte[] documentStream, int offset, int size, int fcMin)
        throws UnsupportedEncodingException {
    PlexOfCps binTable = new PlexOfCps(documentStream, offset, size, 2);

    int length = binTable.length();
    for (int x = 0; x < length; x++) {
        GenericPropertyNode node = binTable.getProperty(x);

        int pageNum = LittleEndian.getShort((byte[]) node.getBytes());
        int pageOffset = POIFSConstants.LARGER_BIG_BLOCK_SIZE * pageNum;
        byte[] tableStream = null;
        int size1 = 0;
        TextPieceTable tpttemp = new TextPieceTable(documentStream, tableStream, pageOffset, size1, fcMin);
        CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, pageOffset, fcMin, tpttemp);

        int fkpSize = cfkp.size();

        for (int y = 0; y < fkpSize; y++) {
            _textRuns.add(cfkp.getCHPX(y));
        }
    }
}

From source file:com.progdan.doc2txt.chp.Word6CHPBinTable.java

License:Apache License

/**
 * Constructor used to read a binTable in from a Word document.
 *
 * @param documentStream The POIFS "WordDocument" stream from a Word document
 * @param offset The offset of the Chp bin table in the main stream.
 * @param size The size of the Chp bin table in the main stream.
 * @param fcMin The start of text in the main stream.
 *///from  w  ww  . j  a va2s.  co  m
public Word6CHPBinTable(byte[] documentStream, int offset, int size, int fcMin) {
    PlexOfCps binTable = new PlexOfCps(documentStream, offset, size, 2);

    int length = binTable.length();
    for (int x = 0; x < length; x++) {
        GenericPropertyNode node = binTable.getProperty(x);

        int pageNum = LittleEndian.getShort((byte[]) node.getBytes());
        int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;

        CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, pageOffset, fcMin);

        int fkpSize = cfkp.size();

        for (int y = 0; y < fkpSize; y++) {
            _textRuns.add(cfkp.getCHPX(y));
        }
    }
}

From source file:org.textmining.extraction.word.chp.Word6CHPBinTable.java

License:Open Source License

/**
 * Constructor used to read a binTable in from a Word document.
 *
 * @param documentStream The POIFS "WordDocument" stream from a Word document
 * @param offset The offset of the Chp bin table in the main stream.
 * @param size The size of the Chp bin table in the main stream.
 * @param fcMin The start of text in the main stream.
 *///w  ww . j  a v a  2  s  .c  o m
public Word6CHPBinTable(byte[] documentStream, int offset, int size, int fcMin, NodeHelper helper) {
    PlexOfCps binTable = new PlexOfCps(documentStream, offset, size, 2);

    int length = binTable.length();
    for (int x = 0; x < length; x++) {
        GenericPropertyNode node = binTable.getProperty(x);

        int pageNum = LittleEndian.getShort((byte[]) node.getBytes());
        int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;

        //TODO fix this
        CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, pageOffset, fcMin, helper);

        int fkpSize = cfkp.size();

        for (int y = 0; y < fkpSize; y++) {
            _textRuns.add(cfkp.getCHPX(y));
        }
    }
    helper.sortNodes(_textRuns, false);
}