List of usage examples for org.apache.poi.poifs.filesystem POIFSFileSystem POIFSFileSystem
public POIFSFileSystem(InputStream stream) throws IOException
From source file:RefDiviedMain.java
License:Creative Commons License
public static Element getTable(String name) { DataFormatter formatter = new DataFormatter(Locale.US); if (name == null) { DocumentBuilder db = null; try {//from w w w . ja v a 2s . co m db = dbf.newDocumentBuilder(); } catch (ParserConfigurationException ex) { ta.append("\nerrors happen:\n"); ta.append(ex.getMessage() + "\n"); } doc = db.newDocument(); } if (name == null) { name = "C:\\Users\\DLiu1\\Documents\\NetBeansProjects\\Simon\\dist\\Table 1"; } String fileName = name + ".xls"; File aaa = new File(fileName); if (!aaa.exists()) { RefDiviedMain .error(fileName + " doesn't exist, please copy the " + fileName + " into the same folder!"); return null; } Element tableFrame = null; try { tableFrame = doc.createElement("table"); tableFrame.setAttribute("frame", "hsides"); tableFrame.setAttribute("rules", "groups"); Element thead = doc.createElement("thead"); Element tbody = doc.createElement("tbody"); tableFrame.appendChild(thead); tableFrame.appendChild(tbody); /** Creating Input Stream**/ //InputStream myInput= ReadExcelFile.class.getResourceAsStream( fileName ); FileInputStream myInput = new FileInputStream(aaa); /** Create a POIFSFileSystem object**/ POIFSFileSystem myFileSystem = new POIFSFileSystem(myInput); /** Create a workbook using the File System**/ HSSFWorkbook myWorkBook = new HSSFWorkbook(myFileSystem); /** Get the first sheet from workbook**/ HSSFSheet mySheet = myWorkBook.getSheetAt(0); /** We now need something to iterate through the cells.**/ Iterator rowIter = mySheet.rowIterator(); int theRow = 0; int theadRows = 1; while (rowIter.hasNext()) { theRow++; HSSFRow myRow = (HSSFRow) rowIter.next(); Iterator cellIter = myRow.cellIterator(); //Vector cellStoreVector=new Vector(); System.out.println("\nprinting " + theRow); Element tr = doc.createElement("tr"); System.out.println("\nprinting " + theRow); while (cellIter.hasNext()) { HSSFCell myCell = (HSSFCell) cellIter.next(); CellProperties cp = new CellProperties(myCell); Element td = null; int colspan = cp.getColspan(); int rowspan = cp.getRowspan(); CellReference ref = new CellReference(myCell); System.out.println( "The value of " + ref.formatAsString() + " is " + formatter.formatCellValue(myCell)); // String myCellValue = myCell.toString(); // myCell.setCellType(Cell.CELL_TYPE_STRING); // String myCellValue = myCell.getRichStringCellValue().toString(); String myCellValue = formatter.formatCellValue(myCell); if (myCellValue != null && myCellValue.trim().endsWith(".0")) { System.out.println(myCellValue + " have 0"); myCellValue = myCellValue.replace(".0", ""); } System.out .println(myCellValue + ": colspan:" + cp.getColspan() + " rowspan:" + cp.getRowspan()); if (rowspan > 1) { if (theRow == 1) { theadRows = rowspan; } } if (theRow <= theadRows) { td = doc.createElement("th"); td.setAttribute("align", "left"); } else { td = doc.createElement("td"); td.setAttribute("align", "left"); td.setAttribute("valign", "top"); } if (colspan > 1) { td.setAttribute("colspan", colspan + ""); } if (rowspan > 1) { td.setAttribute("rowspan", rowspan + ""); } if ((colspan > 1 || rowspan > 1) && myCellValue.trim().equals("")) { continue; } Element bold = doc.createElement("bold"); tr.appendChild(td); td.appendChild(doc.createTextNode(myCellValue.trim())); // cellStoreVector.addElement(myCell); } if (theRow <= theadRows) { thead.appendChild(tr); } else { tbody.appendChild(tr); } // cellVectorHolder.addElement(cellStoreVector); } } catch (Exception e) { ta.append("\nerrors happen:\n"); ta.append(e.getMessage() + "\n"); } return tableFrame; }
From source file:RefDiviedMain.java
License:Creative Commons License
public static void readMyDocument(String fileName) { POIFSFileSystem fs = null;// w w w . j a v a2s. com try { fs = new POIFSFileSystem(new FileInputStream(fileName)); HWPFDocument doc = new HWPFDocument(fs); /** Read the content **/ Main.readParagraphs(doc, RefDiviedMain.ta, RefDiviedMain.refs); int realIndex = 0; try { for (int i = 0; i < refs.size(); i++) { String temp = refs.get(i).trim(); System.out.println(i + ":" + temp); refs.set(i, temp); if (temp == null || temp.length() < 3) { continue; } realIndex++; ta.append("\n" + "dealing with the " + i + " ref:" + refs.get(i)); if (realIndex == 1) { RefDiviedMain.isOriginal = temp; continue; } if (realIndex == 2) { title = (temp == null ? "Unknown Title" : temp); continue; } if (temp.startsWith("[a]")) { if (realIndex < 3) { RefDiviedMain.error("author tag is located at wrong place."); } if (temp.equals("[a]")) { int j = i; while (!refs.get(j).equals("[/a]")) { temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } authors.add(refs.get(j)); j++; } i = j; continue; } else { RefDiviedMain.error("[a] must not be followed by anything.\n" + temp); } } Pattern p = Pattern.compile("^\\s{0,}\\d+/\\d+/\\d+\\s{0,}$"); if (temp.startsWith("Manuscript accepted")) { manuscriptDateAccepted = temp.replace("Manuscript accepted:", ""); Matcher m = p.matcher(manuscriptDateAccepted); if (m.find()) { continue; } else { RefDiviedMain.error( "manuscriptDateAccepted must follow by data format:dd/dd/dddd.\n" + temp); } } if (temp.startsWith("Short title")) { shortTitle = temp.replace("Short title:", ""); continue; } if (temp.startsWith("doi:")) { doi = temp.replace("doi:", ""); continue; } if (temp.startsWith("ppub:")) { ppub = temp.replace("ppub:", ""); Pattern ppub_p = Pattern.compile("^\\s{0,}\\d+/\\d+\\s{0,}$"); Matcher m = ppub_p.matcher(ppub); if (m.find()) { continue; } else { RefDiviedMain.error("ppub data format must be:dd/dd/dddd.\n" + temp); } } if (temp.startsWith("epub:")) { epub = temp.replace("epub:", ""); Matcher m = p.matcher(epub); if (m.find()) { continue; } else { RefDiviedMain.error("epub data format must be:dd/dd/dddd.\n" + temp); } } if (temp.startsWith("volume:")) { volume = temp.replace("volume:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d+\\s{0,}$"); Matcher m = pc.matcher(volume); if (m.find()) { continue; } else { RefDiviedMain.error("must only be digit number:\n" + temp); } } if (temp.startsWith("issue")) { issue = temp.replace("issue:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d+\\s{0,}$"); Matcher m = pc.matcher(issue); if (m.find()) { continue; } else { RefDiviedMain.error("must only be digit number:\n" + temp); } } if (temp.startsWith("fpage")) { fpage = temp.replace("fpage:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d+\\s{0,}$"); Matcher m = pc.matcher(fpage); if (m.find()) { continue; } else { RefDiviedMain.error("must only be digit number:\n" + temp); } } if (temp.startsWith("lpage")) { lpage = temp.replace("lpage:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d+\\s{0,}$"); Matcher m = pc.matcher(lpage); if (m.find()) { continue; } else { RefDiviedMain.error("must only be digit number:\n" + temp); } } if (temp.startsWith("date accepted")) { dateAccepted = temp.replace("date accepted:", ""); Matcher m = p.matcher(dateAccepted); if (m.find()) { continue; } else { RefDiviedMain.error("data format must be:dd/dd/dddd.\n" + temp); } } if (temp.startsWith("copyright-statement")) { copyrightStat = temp.replace("copyright-statement:", ""); continue; } if (temp.startsWith("copyright-year")) { copyrightYear = temp.replace("copyright-year:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d{4}\\s{0,}$"); Matcher m = pc.matcher(copyrightYear); if (m.find()) { continue; } else { RefDiviedMain.error("must only be 4 digit numbers:\n" + temp); } } if (temp.equalsIgnoreCase("Abstract")) { int j = i; while (!refs.get(j).trim().equals("[s1]Keywords")) { ta.append("try find out introduction " + refs.get(j) + "\n"); temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } abstractArr.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.trim().equalsIgnoreCase("[s1]Keywords")) { int j = i; while (!refs.get(j).trim().equals("[body]")) { ta.append("try find out introduction " + refs.get(j) + "\n"); temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } keywordArr.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.equals("[body]")) { RefDiviedMain.isValidBoday = true; int j = i; while (!refs.get(j).equals("Acknowledgement") && !refs.get(j).equals("[back]")) { ta.append("try find out introduction " + refs.get(j) + "\n"); temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } RefDiviedMain.discussion.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.equals("Acknowledgement")) { int j = i + 1; while (!refs.get(j).equals("[back]")) { temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } RefDiviedMain.acknowledgement.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.equals("[back]")) { RefDiviedMain.isValidBack = true; int j = i; while (!refs.get(j).equals("Table") && !refs.get(j).equals("Figure legends") && !refs.get(j).equals("References")) { temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } RefDiviedMain.disclosure.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.equalsIgnoreCase("Table")) { int j = i + 1; while (!refs.get(j).startsWith("Figure legends") && !refs.get(j).equals("References")) { List<String> aTable = new ArrayList<String>(); refs.set(j, refs.get(j).trim()); String tempTemp = refs.get(j).trim(); System.out.println("setup table:" + tempTemp); if (refs.get(j) == null || tempTemp.length() < 3) { j++; continue; } if (!tempTemp.contains("[title]")) { RefDiviedMain.error("problem happened around " + tempTemp); //System.exit(-1); } String[] a = refs.get(j).split("\\[title\\]"); aTable.add(a[0]); String[] b = new String[2]; if (a[1].contains("footnotes")) { b = a[1].split("\\[footnotes\\]"); j++; while (!refs.get(j).startsWith("Figure legends") && !refs.get(j).equals("References") && !refs.get(j).startsWith("Table")) { b[1] += "aaaaa" + refs.get(j); j++; } j--; } else { b[0] = a[1]; b[1] = ""; } aTable.add(b[0]); aTable.add(b[1]); RefDiviedMain.table.add(aTable); j++; } i = j - 1; continue; } if (temp.equals("Figure legends")) { int j = i + 1; while (!refs.get(j).startsWith("References")) { List<String> aTable = new ArrayList<String>(); refs.set(j, refs.get(j).trim()); String tempTemp = refs.get(j).trim(); if (tempTemp == null || tempTemp.length() < 3) { j++; continue; } if (!tempTemp.contains("[legend]")) { RefDiviedMain.error("problem happened around " + tempTemp); // System.exit(-1); } String[] a = refs.get(j).split("\\[legend\\]"); aTable.add(a[0]); String[] b = a[1].split("\\[file\\]"); aTable.add(b[0]); aTable.add(b[1]); RefDiviedMain.figure.add(aTable); j++; } i = j - 1; continue; } if (temp.equals("References")) { RefDiviedMain.isValidRefs = true; i++; while (i < refs.size()) { System.out.println("adding reference before:" + refs.get(i)); String result = refs.get(i); result += "httphttp"; String secondString = null; if (i + 1 < refs.size()) { secondString = refs.get(i + 1); } System.out.println("new string 1:" + secondString); String thirdString = null; if (i + 2 < refs.size()) { thirdString = refs.get(i + 2); } System.out.println("new string 2:" + thirdString); if (secondString != null && (secondString.trim().startsWith("http://") || secondString.trim().startsWith("Http://"))) { result += secondString.trim(); System.out.println("adding second string:" + result); i++; if (thirdString != null && (thirdString.trim().startsWith("http://") || thirdString.trim().startsWith("Http://"))) { result += thirdString.trim(); System.out.println("adding third string:" + result); i++; } } result = result.replaceAll("http://dx.doi.org/", "aaaaadoi"); result = result.replaceAll("Http://dx.doi.org/", "aaaaadoi"); result = result.replaceAll("http://www.ncbi.nlm.nih.gov/pubmed/", "aaaaapmid"); result = result.replaceAll("Http://www.ncbi.nlm.nih.gov/pubmed/", "aaaaapmid"); System.out.println("adding reference after:" + result); RefDiviedMain.references.add(result); i++; } } //doit(refs.get(i), i); } } catch (Exception e) { // TODO Auto-generated catch block ta.append("\nerrors happen:\n"); ta.append(e.getMessage() + "\n"); } int pageNumber = 1; /** We will try reading the header for page 1**/ //readHeader(doc, pageNumber); /** Let's try reading the footer for page 1**/ //readFooter(doc, pageNumber); /** Read the document summary**/ //readDocumentSummary(doc); } catch (Exception e) { ta.append(e.getMessage()); } }
From source file:XLS2CSVmra.java
License:Apache License
/** * Creates a new XLS -> CSV converter * @param filename The file to process//from w ww . jav a 2s. c o m * @param minColumns The minimum number of columns to output, or -1 for no minimum * @throws IOException * @throws FileNotFoundException */ public XLS2CSVmra(String filename, int minColumns) throws IOException, FileNotFoundException { this(new POIFSFileSystem(new FileInputStream(filename)), System.out, minColumns); }
From source file:NewEmptyJUnitTest.java
/** * Test that we can get data from two different * embeded word documents// w w w. j av a2s . c om * @throws Exception */ public void testExtractFromEmbeded() throws Exception { POIFSFileSystem fs = new POIFSFileSystem( POIDataSamples.getSpreadSheetInstance().openResourceAsStream(filename3)); HWPFDocument doc; WordExtractor extractor3; DirectoryNode dirA = (DirectoryNode) fs.getRoot().getEntry("MBD0000A3B7"); DirectoryNode dirB = (DirectoryNode) fs.getRoot().getEntry("MBD0000A3B2"); // Should have WordDocument and 1Table assertNotNull(dirA.getEntry("1Table")); assertNotNull(dirA.getEntry("WordDocument")); assertNotNull(dirB.getEntry("1Table")); assertNotNull(dirB.getEntry("WordDocument")); // Check each in turn doc = new HWPFDocument(dirA, fs); extractor3 = new WordExtractor(doc); assertNotNull(extractor3.getText()); assertTrue(extractor3.getText().length() > 20); assertEquals("I am a sample document\r\nNot much on me\r\nI am document 1\r\n", extractor3.getText()); assertEquals("Sample Doc 1", extractor3.getSummaryInformation().getTitle()); assertEquals("Sample Test", extractor3.getSummaryInformation().getSubject()); doc = new HWPFDocument(dirB, fs); extractor3 = new WordExtractor(doc); assertNotNull(extractor3.getText()); assertTrue(extractor3.getText().length() > 20); assertEquals("I am another sample document\r\nNot much on me\r\nI am document 2\r\n", extractor3.getText()); assertEquals("Sample Doc 2", extractor3.getSummaryInformation().getTitle()); assertEquals("Another Sample Test", extractor3.getSummaryInformation().getSubject()); }
From source file:NewEmptyJUnitTest.java
/** * Tests that we can work with both {@link POIFSFileSystem} * and {@link NPOIFSFileSystem}/*from w ww. java2s . co m*/ */ public void testDifferentPOIFS() throws Exception { POIDataSamples docTests = POIDataSamples.getDocumentInstance(); // Open the two filesystems DirectoryNode[] files = new DirectoryNode[2]; files[0] = (new POIFSFileSystem(docTests.openResourceAsStream("test2.doc"))).getRoot(); NPOIFSFileSystem npoifsFileSystem = new NPOIFSFileSystem(docTests.getFile("test2.doc")); files[1] = npoifsFileSystem.getRoot(); // Open directly for (DirectoryNode dir : files) { WordExtractor extractor = new WordExtractor(dir); assertEquals(p_text1_block, extractor.getText()); } // Open via a HWPFDocument for (DirectoryNode dir : files) { HWPFDocument doc = new HWPFDocument(dir); WordExtractor extractor = new WordExtractor(doc); assertEquals(p_text1_block, extractor.getText()); } npoifsFileSystem.close(); }
From source file:NewEmptyJUnitTest.java
/** * [RESOLVED FIXED] Bug 51686 - Update to POI 3.8 beta 4 causes * ConcurrentModificationException in Tika's OfficeParser *//*from ww w .j a v a 2s .c om*/ public void testBug51686() throws IOException { InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("Bug51686.doc"); POIFSFileSystem fs = new POIFSFileSystem(is); String text = null; for (Entry entry : fs.getRoot()) { if ("WordDocument".equals(entry.getName())) { WordExtractor ex = new WordExtractor(fs); try { text = ex.getText(); } finally { ex.close(); } } } assertNotNull(text); }
From source file:NewEmptyJUnitTest.java
public void testExtractorFromWord6Extractor() throws Exception { POIFSFileSystem fs = new POIFSFileSystem( POIDataSamples.getHPSFInstance().openResourceAsStream("TestMickey.doc")); Word6Extractor wExt = new Word6Extractor(fs); try {/* w w w .j av a 2 s .c om*/ POITextExtractor ext = wExt.getMetadataTextExtractor(); try { // Now overall String text = ext.getText(); assertTrue(text.indexOf("TEMPLATE = Normal") > -1); assertTrue(text.indexOf("SUBJECT = sample subject") > -1); assertTrue(text.indexOf("MANAGER = sample manager") > -1); assertTrue(text.indexOf("COMPANY = sample company") > -1); } finally { ext.close(); } } finally { wExt.close(); } }
From source file:XLS2CSV.java
License:Apache License
public XLS2CSV(String inputFilePath, String outputFilePath) throws Exception { fs = new POIFSFileSystem(new FileInputStream(inputFilePath)); output = new PrintStream(outputFilePath, OUTPUT_CHARSET); minColumns = -1;/*from w w w . j a v a 2 s .c om*/ }
From source file:XLS2CSV.java
License:Apache License
/** * Creates a new XLS -> CSV converter * //w w w. j a v a 2 s .c o m * @param filename * The file to process * @param minColumns * The minimum number of columns to output, or -1 for no minimum * @throws IOException * @throws FileNotFoundException */ public XLS2CSV(String filename, int minColumns) throws IOException, FileNotFoundException { this(new POIFSFileSystem(new FileInputStream(filename)), System.out, minColumns); }
From source file:RefSouceOnlyMain.java
License:Creative Commons License
public static Element getTable(String name) { DataFormatter formatter = new DataFormatter(Locale.US); if (name == null) { DocumentBuilder db = null; try {/*ww w . j av a2 s .c om*/ db = dbf.newDocumentBuilder(); } catch (ParserConfigurationException ex) { ta.append("\nerrors happen:\n"); ta.append(ex.getMessage() + "\n"); } doc = db.newDocument(); } if (name == null) { name = "C:\\Users\\DLiu1\\Documents\\NetBeansProjects\\Simon\\dist\\Table 1"; } String fileName = name + ".xls"; File aaa = new File(fileName); if (!aaa.exists()) { RefSouceOnlyMain .error(fileName + " doesn't exist, please copy the " + fileName + " into the same folder!"); return null; } Element tableFrame = null; try { tableFrame = doc.createElement("table"); tableFrame.setAttribute("frame", "hsides"); tableFrame.setAttribute("rules", "groups"); Element thead = doc.createElement("thead"); Element tbody = doc.createElement("tbody"); tableFrame.appendChild(thead); tableFrame.appendChild(tbody); /** Creating Input Stream**/ //InputStream myInput= ReadExcelFile.class.getResourceAsStream( fileName ); FileInputStream myInput = new FileInputStream(aaa); /** Create a POIFSFileSystem object**/ POIFSFileSystem myFileSystem = new POIFSFileSystem(myInput); /** Create a workbook using the File System**/ HSSFWorkbook myWorkBook = new HSSFWorkbook(myFileSystem); /** Get the first sheet from workbook**/ HSSFSheet mySheet = myWorkBook.getSheetAt(0); /** We now need something to iterate through the cells.**/ Iterator rowIter = mySheet.rowIterator(); int theRow = 0; int theadRows = 1; while (rowIter.hasNext()) { theRow++; HSSFRow myRow = (HSSFRow) rowIter.next(); Iterator cellIter = myRow.cellIterator(); //Vector cellStoreVector=new Vector(); System.out.println("\nprinting " + theRow); Element tr = doc.createElement("tr"); System.out.println("\nprinting " + theRow); while (cellIter.hasNext()) { HSSFCell myCell = (HSSFCell) cellIter.next(); CellProperties cp = new CellProperties(myCell); Element td = null; int colspan = cp.getColspan(); int rowspan = cp.getRowspan(); CellReference ref = new CellReference(myCell); System.out.println( "The value of " + ref.formatAsString() + " is " + formatter.formatCellValue(myCell)); // String myCellValue = myCell.toString(); // myCell.setCellType(Cell.CELL_TYPE_STRING); // String myCellValue = myCell.getRichStringCellValue().toString(); String myCellValue = formatter.formatCellValue(myCell); if (myCellValue != null && myCellValue.trim().endsWith(".0")) { System.out.println(myCellValue + " have 0"); myCellValue = myCellValue.replace(".0", ""); } System.out .println(myCellValue + ": colspan:" + cp.getColspan() + " rowspan:" + cp.getRowspan()); if (rowspan > 1) { if (theRow == 1) { theadRows = rowspan; } } if (theRow <= theadRows) { td = doc.createElement("th"); td.setAttribute("align", "left"); } else { td = doc.createElement("td"); td.setAttribute("align", "left"); td.setAttribute("valign", "top"); } if (colspan > 1) { td.setAttribute("colspan", colspan + ""); } if (rowspan > 1) { td.setAttribute("rowspan", rowspan + ""); } if ((colspan > 1 || rowspan > 1) && myCellValue.trim().equals("")) { continue; } Element bold = doc.createElement("bold"); tr.appendChild(td); td.appendChild(doc.createTextNode(myCellValue.trim())); // cellStoreVector.addElement(myCell); } if (theRow <= theadRows) { thead.appendChild(tr); } else { tbody.appendChild(tr); } // cellVectorHolder.addElement(cellStoreVector); } } catch (Exception e) { ta.append("\nerrors happen:\n"); ta.append(e.getMessage() + "\n"); } return tableFrame; }