List of usage examples for org.apache.poi.util LittleEndian readInt
public static int readInt(InputStream stream) throws IOException, BufferUnderrunException
From source file:org.apache.tika.parser.executable.ExecutableParser.java
License:Apache License
/** * Parses a DOS or Windows PE file/* w w w. j ava 2 s .co m*/ */ public void parsePE(XHTMLContentHandler xhtml, Metadata metadata, InputStream stream, byte[] first4) throws TikaException, IOException { metadata.add(Metadata.CONTENT_TYPE, PE_EXE.toString()); metadata.set(PLATFORM, PLATFORM_WINDOWS); // Skip over the MS-DOS bit byte[] msdosSection = new byte[0x3c - 4]; IOUtils.readFully(stream, msdosSection); // Grab the PE header offset int peOffset = LittleEndian.readInt(stream); // Sanity check - while it may go anywhere, it's normally in the first few kb if (peOffset > 4096 || peOffset < 0x3f) return; // Skip the rest of the MS-DOS stub (if PE), until we reach what should // be the PE header (if this is a PE executable) stream.skip(peOffset - 0x40); // Read the PE header byte[] pe = new byte[24]; IOUtils.readFully(stream, pe); // Check it really is a PE header if (pe[0] == (byte) 'P' && pe[1] == (byte) 'E' && pe[2] == 0 && pe[3] == 0) { // Good, has a valid PE signature } else { // Old style MS-DOS return; } // Read the header values int machine = LittleEndian.getUShort(pe, 4); int numSectors = LittleEndian.getUShort(pe, 6); long createdAt = LittleEndian.getInt(pe, 8); long symbolTableOffset = LittleEndian.getInt(pe, 12); long numSymbols = LittleEndian.getInt(pe, 16); int sizeOptHdrs = LittleEndian.getUShort(pe, 20); int characteristcs = LittleEndian.getUShort(pe, 22); // Turn this into helpful metadata Date createdAtD = new Date(createdAt * 1000l); metadata.set(Metadata.CREATION_DATE, createdAtD); switch (machine) { case 0x14c: metadata.set(MACHINE_TYPE, MACHINE_x86_32); metadata.set(ENDIAN, Endian.LITTLE.getName()); metadata.set(ARCHITECTURE_BITS, "32"); break; case 0x8664: metadata.set(MACHINE_TYPE, MACHINE_x86_32); metadata.set(ENDIAN, Endian.LITTLE.getName()); metadata.set(ARCHITECTURE_BITS, "64"); break; case 0x200: metadata.set(MACHINE_TYPE, MACHINE_IA_64); metadata.set(ENDIAN, Endian.LITTLE.getName()); metadata.set(ARCHITECTURE_BITS, "64"); break; case 0x184: metadata.set(MACHINE_TYPE, MACHINE_ALPHA); metadata.set(ENDIAN, Endian.LITTLE.getName()); metadata.set(ARCHITECTURE_BITS, "32"); break; case 0x284: metadata.set(MACHINE_TYPE, MACHINE_ALPHA); metadata.set(ENDIAN, Endian.LITTLE.getName()); metadata.set(ARCHITECTURE_BITS, "64"); break; case 0x1c0: case 0x1c4: metadata.set(MACHINE_TYPE, MACHINE_ARM); metadata.set(ENDIAN, Endian.LITTLE.getName()); metadata.set(ARCHITECTURE_BITS, "32"); break; case 0x268: metadata.set(MACHINE_TYPE, MACHINE_M68K); metadata.set(ENDIAN, Endian.BIG.getName()); metadata.set(ARCHITECTURE_BITS, "32"); break; case 0x266: case 0x366: case 0x466: metadata.set(MACHINE_TYPE, MACHINE_MIPS); metadata.set(ENDIAN, Endian.BIG.getName()); metadata.set(ARCHITECTURE_BITS, "16"); break; case 0x162: case 0x166: case 0x168: case 0x169: metadata.set(MACHINE_TYPE, MACHINE_MIPS); metadata.set(ENDIAN, Endian.LITTLE.getName()); metadata.set(ARCHITECTURE_BITS, "16"); break; case 0x1f0: case 0x1f1: metadata.set(MACHINE_TYPE, MACHINE_PPC); metadata.set(ENDIAN, Endian.LITTLE.getName()); metadata.set(ARCHITECTURE_BITS, "32"); break; case 0x1a2: case 0x1a3: metadata.set(MACHINE_TYPE, MACHINE_SH3); metadata.set(ENDIAN, Endian.BIG.getName()); metadata.set(ARCHITECTURE_BITS, "32"); break; case 0x1a6: metadata.set(MACHINE_TYPE, MACHINE_SH4); metadata.set(ENDIAN, Endian.BIG.getName()); metadata.set(ARCHITECTURE_BITS, "32"); break; case 0x1a8: metadata.set(MACHINE_TYPE, MACHINE_SH3); metadata.set(ENDIAN, Endian.BIG.getName()); metadata.set(ARCHITECTURE_BITS, "32"); break; case 0x9041: metadata.set(MACHINE_TYPE, MACHINE_M32R); metadata.set(ENDIAN, Endian.BIG.getName()); metadata.set(ARCHITECTURE_BITS, "32"); break; case 0xebc: metadata.set(MACHINE_TYPE, MACHINE_EFI); break; default: metadata.set(MACHINE_TYPE, MACHINE_UNKNOWN); break; } }