Java tutorial
/** * * Copyright 2009-2013 The MITRE Corporation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * ************************************************************************** * NOTICE * This software was produced for the U. S. Government under Contract No. * W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer * Software and Noncommercial Computer Software Documentation Clause * 252.227-7014 (JUN 1995) * * (c) 2012 The MITRE Corporation. All Rights Reserved. * ************************************************************************** */ package org.opensextant.xtext.collectors; import java.io.*; import java.util.zip.GZIPInputStream; import org.apache.commons.io.FilenameUtils; import org.apache.commons.compress.archivers.ArchiveStreamFactory; import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.compress.archivers.zip.*; import org.apache.commons.compress.archivers.tar.*; import org.apache.commons.compress.utils.IOUtils; import org.opensextant.ConfigException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.opensextant.xtext.ExclusionFilter; import org.opensextant.xtext.Converter; /** * Archive is traversed, but no data is written to disk unless XText is in save * mode. Conversion listener should be listening for Converted Docs. * * @author Marc C. Ubaldino, MITRE, ubaldino at mitre dot org */ public class ArchiveNavigator implements Collector { private final Logger log = LoggerFactory.getLogger(ArchiveNavigator.class); private File saveDir = null; private ExclusionFilter filter = null; private Converter converter = null; public boolean overwrite = false; /** * Given a working temp folder and a file filter unpack archives. Teh working dir, saveTo, is not created. * It must exist ahead of time; * * @param inputFile input archive * @param saveTo output dir where entries are saved. * @param fileFilter file exension filter * @param fileConv conversion resource, e.g. instance of XText * @throws IOException Signals that an I/O exception has occurred. */ public ArchiveNavigator(File inputFile, String saveTo, ExclusionFilter fileFilter, Converter fileConv) throws IOException { this.saveDir = new File(saveTo); filter = fileFilter; converter = fileConv; if (filter == null || converter == null) { throw new IOException("Filter and converter cannot be null -- XText is the default for both."); } currentArchive = inputFile; } public String getWorkingDir() { return saveDir.getAbsolutePath(); } private File currentArchive = null; /** * Unpack any archive. You must provide a converter -- which converts each * file. * @throws ConfigException if archive output dirs were requested but unsettable or non-existant * @throws IOException if archive had I/O issues or is invalid type of archive */ @Override public void collect() throws IOException, ConfigException { // Get file extension String ext = FilenameUtils.getExtension(currentArchive.getPath()); File archivetmp = null; if (ext.equalsIgnoreCase("zip")) { archivetmp = unzip(currentArchive); } else if (ext.equalsIgnoreCase("tar")) { archivetmp = untar(currentArchive); } else if (ext.equalsIgnoreCase("gz") || ext.equalsIgnoreCase("tgz") || ext.equalsIgnoreCase("tar.gz")) { String basename = FilenameUtils.getBaseName(currentArchive.getName()); // We assume the file is a tarball. First unzip it File tarFile = gunzipAsTAR(currentArchive, basename); // Then untar it archivetmp = untar(tarFile); } else { throw new IOException("Unsupported archive type: EXT=" + ext); } log.info("Archive FILE={} has been processed to DIR={}", currentArchive, archivetmp); } /* * Un-TAR. Oops. Its just a copy of Un-TAR and I replace tar with zip. * * so there may be Zip-specific stuff here, ... but the approach is the * same. */ public File unzip(File zipFile) throws IOException, ConfigException { // String _working = FilenameUtils.concat(getWorkingDir(), // FilenameUtils.getBaseName(zipFile.getPath())); // if (_working == null){ // throw new IOException("Invalid archive path for "+zipFile.getPath()); // } // File workingDir = new File(_working); // workingDir.mkdir(); File workingDir = saveDir; InputStream input = new BufferedInputStream(new FileInputStream(zipFile)); ZipArchiveInputStream in = null; try { in = (ZipArchiveInputStream) (new ArchiveStreamFactory().createArchiveInputStream("zip", input)); ZipArchiveEntry zipEntry; while ((zipEntry = (ZipArchiveEntry) in.getNextEntry()) != null) { if (filterEntry(zipEntry)) { continue; } try { File tmpFile = saveArchiveEntry(zipEntry, in, workingDir); converter.convert(tmpFile); } catch (IOException err) { log.error("Unable to save item, FILE=" + zipEntry.getName() + "!" + zipEntry.getName(), err); } } return workingDir; } catch (ArchiveException ae) { throw new IOException(ae); } finally { in.close(); } } /** * * @param theFile * @param fname * @return TAR file path for result. * @throws IOException on I/O failure */ private File gunzipAsTAR(File theFile, String fname) throws IOException { GZIPInputStream gzipInputStream = null; OutputStream out = null; try { gzipInputStream = new GZIPInputStream(new FileInputStream(theFile)); // TODO: more testing on this particular case: gunzip *.gz *.tgz *.tar.gz -- a mix of tar and gunzip String outFilename = getWorkingDir() + '/' + fname + ".tar"; File outFile = new File(outFilename); out = new BufferedOutputStream(new FileOutputStream(outFilename)); byte[] buf = new byte[1024]; int len; while ((len = gzipInputStream.read(buf)) > 0) { out.write(buf, 0, len); } return outFile; } finally { gzipInputStream.close(); if (out != null) { out.close(); } } } /* * Un-TAR Once items are saved off to temp folder, they'll be converted by * the file converter. The converter can choose to do something else with * them. */ public File untar(File tarFile) throws IOException, ConfigException { String _working = FilenameUtils.concat(getWorkingDir(), FilenameUtils.getBaseName(tarFile.getPath())); if (_working == null) { throw new IOException("Invalid archive path for " + tarFile.getPath()); } File workingDir = new File(_working); workingDir.mkdir(); InputStream input = new BufferedInputStream(new FileInputStream(tarFile)); TarArchiveInputStream in = null; try { in = (TarArchiveInputStream) (new ArchiveStreamFactory().createArchiveInputStream("tar", input)); TarArchiveEntry tarEntry; while ((tarEntry = (TarArchiveEntry) in.getNextEntry()) != null) { if (filterEntry(tarEntry)) { continue; } try { File tmpFile = saveArchiveEntry(tarEntry, in, _working); converter.convert(tmpFile); } catch (IOException err) { log.error("Unable to save item, FILE=" + tarFile.getName() + "!" + tarEntry.getName(), err); } } } catch (ArchiveException ae) { throw new IOException(ae); } finally { in.close(); } return workingDir; } /** * save to root dir * * @param E * @param archiveio * @param root * @return * @throws IOException if entry could not be saved to disk, e.g., outputDir */ private File saveArchiveEntry(ArchiveEntry E, InputStream archiveio, File root) throws IOException { return saveArchiveEntry(E, archiveio, root.getAbsolutePath()); } /** */ private File saveArchiveEntry(ArchiveEntry E, InputStream archiveio, String root) throws IOException { // Note: using native OS file path is fine here. As long as you do not // try any string mechanics on paths. // String targetPath = FilenameUtils.concat(root, E.getName()); if (targetPath == null) { throw new IOException("Invalid archive entry target for " + E.getName()); } File target = new File(targetPath); if (target.exists() && !overwrite) { return target; } target.getParentFile().mkdirs(); log.debug("ARCHIVE_ENTRY={}", E.getName()); OutputStream output = null; try { output = new FileOutputStream(target); IOUtils.copy(archiveio, output); } finally { output.close(); } return target; } private boolean filterEntry(ArchiveEntry E) { if (E.isDirectory()) { return true; } if (filter.filterOutFile(E.getName())) { return true; } return false; } @Override public String getName() { // TODO Auto-generated method stub return null; } }