Java tutorial
/******************************************************************************* * Copyright (c) 2011 Tran Nam Quang. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Tran Nam Quang - initial API and implementation *******************************************************************************/ package net.sourceforge.vaticanfetcher.model.index.file; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.List; import net.sourceforge.vaticanfetcher.enums.Msg; import net.sourceforge.vaticanfetcher.model.FileResource; import net.sourceforge.vaticanfetcher.model.HotColdFileCache; import net.sourceforge.vaticanfetcher.model.HotColdFileCache.PermanentFileResource; import net.sourceforge.vaticanfetcher.model.Path; import net.sourceforge.vaticanfetcher.model.Path.PathParts; import net.sourceforge.vaticanfetcher.model.TreeNode; import net.sourceforge.vaticanfetcher.model.UtilModel; import net.sourceforge.vaticanfetcher.model.index.DiskSpaceException; import net.sourceforge.vaticanfetcher.model.index.IndexingConfig; import net.sourceforge.vaticanfetcher.model.index.IndexingException; import net.sourceforge.vaticanfetcher.model.index.file.FileFolder.FileFolderVisitor; import net.sourceforge.vaticanfetcher.model.parse.ParseException; import net.sourceforge.vaticanfetcher.util.Util; import net.sourceforge.vaticanfetcher.util.annotations.NotNull; import net.sourceforge.vaticanfetcher.util.annotations.ThreadSafe; import SevenZip.Archive.IInArchive; import SevenZip.Archive.SevenZipEntry; import SevenZip.Archive.SevenZip.Handler; import com.google.common.base.Predicate; import com.google.common.io.Closeables; import com.google.common.io.NullOutputStream; import de.innosystec.unrar.Archive; import de.innosystec.unrar.exception.RarException; import de.innosystec.unrar.rarfile.FileHeader; import de.schlichtherle.truezip.file.TFile; import de.schlichtherle.truezip.file.TVFS; import de.schlichtherle.truezip.fs.FsSyncException; public final class FileFactory { private final HotColdFileCache unpackCache; public FileFactory(@NotNull HotColdFileCache unpackCache) { this.unpackCache = Util.checkNotNull(unpackCache); } // thrown parse exception has localized error message @NotNull @ThreadSafe public FileResource createFile(@NotNull IndexingConfig config, @NotNull Path path) throws ParseException, FileNotFoundException { Util.checkNotNull(config, path); try { PathParts pathParts = path.splitAtExistingFile(); File leftFile = pathParts.getLeft().getCanonicalFile(); // Input path refers to an ordinary file; // this is the most common case and must therefore return reasonably fast if (pathParts.getRight().isEmpty()) return new PermanentFileResource(leftFile); // Input path seems to refer to an archive entry; let's check the cache first final FileResource[] cachedResource = new FileResource[1]; PathParts cacheSplitParts = path.splitFromRight(new Predicate<Path>() { public boolean apply(Path path) { cachedResource[0] = unpackCache.get(path); return cachedResource[0] != null; } }); // Nothing found in cache; attempt to unpack the archive entry if (cacheSplitParts == null) { FileResource archiveResource = new PermanentFileResource(leftFile); Path archivePath = pathParts.getLeft(); String entryPath = pathParts.getRight(); return unpackFromArchive(config, archivePath, archiveResource, entryPath); } // Found unpacked ordinary file in cache; just return it if (cacheSplitParts.getRight().isEmpty()) return cachedResource[0]; // Found intermediate archive in cache; need to unpack the remainder Path archivePath = cacheSplitParts.getLeft(); String entryPath = cacheSplitParts.getRight(); return unpackFromArchive(config, archivePath, cachedResource[0], entryPath); } catch (FileNotFoundException e) { // Discard original exception to provide a more helpful message String msg = Msg.file_not_found.get() + " " + path.getPath(); throw new FileNotFoundException(msg); } catch (ArchiveEncryptedException e) { String msg = Msg.archive_encrypted.get(); throw new ParseException(msg, e); } catch (DiskSpaceException e) { throw new ParseException(e); // TODO i18n: add localized error message } catch (IOException e) { throw new ParseException(e); } } @NotNull private FileResource unpackFromArchive(@NotNull IndexingConfig config, @NotNull Path originalArchivePath, @NotNull FileResource archiveResource, @NotNull String entryPath) throws ArchiveEncryptedException, DiskSpaceException, FileNotFoundException, IOException { File archiveFile = archiveResource.getFile(); assert !(archiveFile instanceof TFile); assert archiveFile.isFile(); TFile tzFile = new TFile(archiveFile, config.createZipDetector()); if (tzFile.isDirectory()) return unpackFromZipArchive(config, originalArchivePath, archiveResource, tzFile, entryPath); return unpackFromSolidArchive(config, originalArchivePath, archiveResource, entryPath); } @NotNull private FileResource unpackFromSolidArchive(@NotNull IndexingConfig config, @NotNull Path originalArchivePath, @NotNull FileResource archiveResource, @NotNull String entryPath) throws ArchiveEncryptedException, DiskSpaceException, FileNotFoundException, IOException { File archiveFile = archiveResource.getFile(); String archiveExt = Util.splitFilename(archiveFile)[1]; if (!Util.hasExtension(entryPath, config.getHtmlExtensions())) { // Without HTML pairing if (archiveExt.equals("exe") || archiveExt.equals("7z")) return unpackFrom7zArchive(config, originalArchivePath, archiveResource, entryPath); else if (archiveExt.equals("rar")) // JUnRar doesn't support SFX rar archives return unpackFromRarArchive(config, originalArchivePath, archiveResource, entryPath); else throw new FileNotFoundException(); } else { // With HTML pairing /* Note: We'll ignore the HTML pairing flag in the config object and always leave the HTML pairing on. */ SolidArchiveTree<?> solidArchive; if (archiveExt.equals("exe") || archiveExt.equals("7z")) solidArchive = new SevenZipTree(archiveFile, config, true, originalArchivePath, null); else if (archiveExt.equals("rar")) // JUnRar doesn't support SFX rar archives solidArchive = new RarTree(archiveFile, config, true, originalArchivePath, null); else throw new FileNotFoundException(); return unpackFromSolidArchive(config, archiveResource, solidArchive, entryPath); } } @NotNull private FileResource unpackFromZipArchive(@NotNull final IndexingConfig config, @NotNull final Path originalArchivePath, @NotNull final FileResource archiveResource, @NotNull final TFile archiveFile, @NotNull final String entryPath) throws ArchiveEncryptedException, DiskSpaceException, FileNotFoundException, IOException { assert archiveFile.isArchive(); assert archiveFile.getEnclArchive() == null; final FileResource[] result = new FileResource[1]; final Exception[] exception = new Exception[1]; /* Note: We'll ignore the HTML pairing flag in the config object and always leave the HTML pairing on. */ new HtmlFileWalker(archiveFile, config) { protected void handleFile(File file) { String currentPath = getRelativePath(archiveFile, file); try { if (currentPath.equals(entryPath)) { // Exact match on regular file File unpackedFile = maybeUnpackZipEntry(config, file); Path cacheKey = originalArchivePath.createSubPath(currentPath); result[0] = unpackCache.putIfAbsent(cacheKey, unpackedFile); stop(); } else if (entryPath.startsWith(currentPath + "/") // Partial match on solid archive && config.isSolidArchive(file.getName())) { File innerArchiveFile; try { innerArchiveFile = maybeUnpackZipEntry(config, file); TVFS.umount(archiveFile); } finally { archiveResource.dispose(); } Path cacheKey = originalArchivePath.createSubPath(currentPath); FileResource innerArchive = unpackCache.putIfAbsent(cacheKey, innerArchiveFile); String remainingPath = entryPath.substring(currentPath.length() + 1); result[0] = unpackFromSolidArchive(config, cacheKey, innerArchive, remainingPath); stop(); } } catch (Exception e) { exception[0] = e; stop(); } } protected void handleHtmlPair(File htmlFile, File htmlDir) { String currentPath = getRelativePath(archiveFile, htmlFile); if (!currentPath.equals(entryPath)) return; Path cacheKey = originalArchivePath.createSubPath(currentPath); try { if (htmlDir == null) { File unpackedFile = maybeUnpackZipEntry(config, htmlFile); result[0] = unpackCache.putIfAbsent(cacheKey, unpackedFile); } else { /* * Here, we could check whether there's enough disk space for unpacking the files, and throw an exception if not. * However, this requires recursing into the zipped HTML folder and is also very unlikely to happen, so we'll leave it out. */ TFile tzHtmlFile = (TFile) htmlFile; TFile tzHtmlDir = (TFile) htmlDir; File tempDir = Util.createTempDir(); File newHtmlFile = new File(tempDir, tzHtmlFile.getName()); File newHtmlDir = new File(tempDir, tzHtmlDir.getName()); tzHtmlFile.cp(newHtmlFile); tzHtmlDir.cp_r(newHtmlDir); result[0] = unpackCache.putIfAbsent(cacheKey, newHtmlFile, tempDir); } } catch (Exception e) { exception[0] = e; } stop(); } protected void handleDir(File dir) { // Nothing to do } protected boolean skip(File fileOrDir) { if (!fileOrDir.isDirectory()) return false; // Skip all directories that aren't parent directories of the target file String currentPath = getRelativePath(archiveFile, fileOrDir); return !entryPath.startsWith(currentPath + "/"); } protected void runFinally() { try { if (archiveFile.exists()) // Might have been deleted earlier TVFS.umount(archiveFile); } catch (FsSyncException e) { exception[0] = e; } finally { archiveResource.dispose(); } } }.runSilently(); maybeThrow(exception, DiskSpaceException.class); maybeThrow(exception, IOException.class); maybeThrow(exception, FileNotFoundException.class); maybeThrow(exception, ArchiveEncryptedException.class); /* If we reach this point with a non-null exception array element, we might have forgotten to rethrow exceptions of a particular type. */ assert exception[0] == null : exception[0].toString(); if (result[0] == null) throw new FileNotFoundException(); return result[0]; } private static <T extends Throwable> void maybeThrow(@NotNull Exception[] array, @NotNull Class<T> clazz) throws T { assert array.length == 1; if (clazz.isInstance(array[0])) throw clazz.cast(array[0]); } @NotNull private static String getRelativePath(@NotNull File src, @NotNull File dst) { String path = UtilModel.getRelativePath(src, dst); assert noTrailingSlash(path); return path; } @NotNull private static File maybeUnpackZipEntry(@NotNull IndexingConfig config, @NotNull File packedFile) throws DiskSpaceException, IOException { try { File unpackedFile = UtilModel.maybeUnpackZipEntry(config, packedFile); assert unpackedFile != null; return unpackedFile; } catch (IndexingException e) { throw e.getIOException(); } } // does not support HTML pairing, but is faster and more lightweight @NotNull private FileResource unpackFrom7zArchive(@NotNull final IndexingConfig config, @NotNull Path originalArchivePath, @NotNull FileResource archiveResource, @NotNull String entryPath) throws ArchiveEncryptedException, DiskSpaceException, FileNotFoundException, IOException { // TODO now: windows: can we check if the archive or the target archive entry is encrypted? assert noTrailingSlash(entryPath); IInArchive archive = new Handler(); SevenZipInputStream istream = new SevenZipInputStream(archiveResource.getFile()); if (archive.Open(istream) != 0) throw new IOException(); try { for (int i = 0; i < archive.size(); i++) { SevenZipEntry entry = archive.getEntry(i); if (entry.isDirectory()) continue; final String currentPath = entry.getName(); assert !currentPath.contains("\\"); assert noTrailingSlash(currentPath); // TODO post-release-1.1: throw disk space exception if (entryPath.equals(currentPath)) { // Exact match File unpackedFile = unpack7zEntry(config, archive, currentPath, i); Path cacheKey = originalArchivePath.createSubPath(currentPath); return unpackCache.putIfAbsent(cacheKey, unpackedFile); } else if (entryPath.startsWith(currentPath + "/") && config.isArchive(currentPath)) { // Partial match File innerArchiveFile; try { innerArchiveFile = unpack7zEntry(config, archive, currentPath, i); } finally { archiveResource.dispose(); } Path cacheKey = originalArchivePath.createSubPath(currentPath); String remainingPath = entryPath.substring(currentPath.length() + 1); FileResource innerArchive = unpackCache.putIfAbsent(cacheKey, innerArchiveFile); return unpackFromArchive(config, cacheKey, innerArchive, remainingPath); } } } finally { archive.close(); archiveResource.dispose(); } throw new FileNotFoundException(); } private static File unpack7zEntry(@NotNull final IndexingConfig config, @NotNull IInArchive archive, @NotNull final String entryPath, int index) throws IOException { return new SevenZipUnpacker<File>(archive) { private File unpackedFile; public File getOutputFile(int index) throws IOException { String entryName = getLastPathPart(entryPath); try { return unpackedFile = config.createDerivedTempFile(entryName); } catch (IndexingException e) { throw e.getIOException(); } } public File getUnpackResult() { return unpackedFile; } }.unpack(index); } // does not support HTML pairing, but is faster and more lightweight @NotNull private FileResource unpackFromRarArchive(@NotNull IndexingConfig config, @NotNull Path originalArchivePath, @NotNull FileResource archiveResource, @NotNull String entryPath) throws ArchiveEncryptedException, DiskSpaceException, FileNotFoundException, IOException { Archive archive = null; try { File archiveFile = archiveResource.getFile(); archive = new Archive(archiveFile); if (archive.isEncrypted()) throw new ArchiveEncryptedException(archiveFile, originalArchivePath.getPath()); List<FileHeader> fileHeaders = archive.getFileHeaders(); boolean isSolid = false; for (FileHeader fh : fileHeaders) { if (fh.isSolid()) { isSolid = true; break; } } /* * For solid archives, if we want to extract a certain archive entry, we also have to extract all archive entries that preceded * it. Thus, it is more efficient to run through the archive twice rather than once: During the first phase, we check for any * matching archive entries by only looking at the file headers, and return early if there is no match. Only if there is a match, * we'll proceed to the second phase, where, if the archive uses solid compression, all files up to the target file will be extracted. */ if (isSolid) { boolean match = false; for (FileHeader fh : fileHeaders) { if (fh.isEncrypted() || fh.isDirectory()) continue; String currentPath = fh.isUnicode() ? fh.getFileNameW() : fh.getFileNameString(); currentPath = Util.toForwardSlashes(currentPath); assert noTrailingSlash(currentPath); if (entryPath.equals(currentPath) || (entryPath.startsWith(currentPath + "/") && config.isArchive(currentPath))) { match = true; break; } } if (!match) throw new FileNotFoundException(); } FileHeader fh = null; NullOutputStream nullOut = isSolid ? new NullOutputStream() : null; while (true) { fh = archive.nextFileHeader(); if (fh == null) break; // Last entry reached if (fh.isEncrypted() || fh.isDirectory()) continue; String currentPath = fh.isUnicode() ? fh.getFileNameW() : fh.getFileNameString(); currentPath = Util.toForwardSlashes(currentPath); assert noTrailingSlash(currentPath); // TODO post-release-1.1: throw disk space exception if (entryPath.equals(currentPath)) { // Exact match Path cacheKey = originalArchivePath.createSubPath(currentPath); File unpackedFile = unpackRarEntry(config, archive, fh, entryPath); return unpackCache.putIfAbsent(cacheKey, unpackedFile); } else if (entryPath.startsWith(currentPath + "/") && config.isArchive(currentPath)) { // Partial match File innerArchiveFile; try { innerArchiveFile = unpackRarEntry(config, archive, fh, currentPath); } finally { archiveResource.dispose(); } Path cacheKey = originalArchivePath.createSubPath(currentPath); FileResource innerArchive = unpackCache.putIfAbsent(cacheKey, innerArchiveFile); String remainingPath = entryPath.substring(currentPath.length() + 1); return unpackFromArchive(config, cacheKey, innerArchive, remainingPath); } else if (isSolid) { // Not a match archive.extractFile(fh, nullOut); } } } catch (RarException e) { throw new IOException(e); } finally { Closeables.closeQuietly(archive); archiveResource.dispose(); } throw new FileNotFoundException(); } @NotNull private static File unpackRarEntry(@NotNull IndexingConfig config, @NotNull Archive archive, @NotNull FileHeader fh, @NotNull String entryPath) throws IOException, RarException { String entryName = getLastPathPart(entryPath); OutputStream out = null; try { File unpackedFile = config.createDerivedTempFile(entryName); out = new FileOutputStream(unpackedFile); archive.extractFile(fh, out); return unpackedFile; } catch (IndexingException e) { throw e.getIOException(); } finally { Closeables.closeQuietly(out); } } @NotNull private static String getLastPathPart(@NotNull String string) { for (int i = string.length() - 1; i >= 0; i--) { char c = string.charAt(i); if (c == '/' || c == '\\') { return string.substring(i + 1); } } return string; } // paths should not contain backslashes // entryPath is relative to archive root // supports HTML pairing, but has some overhead @NotNull private FileResource unpackFromSolidArchive(@NotNull IndexingConfig config, @NotNull FileResource archiveResource, @NotNull final SolidArchiveTree<?> archive, @NotNull final String entryPath) throws ArchiveEncryptedException, DiskSpaceException, FileNotFoundException, IOException { final TreeNode[] matchingNode = new TreeNode[1]; final String[] remainingPath = new String[1]; try { new FileFolderVisitor<Exception>(archive.getArchiveFolder()) { protected void visitFolder(FileFolder parent, FileFolder folder) { if (!folder.isArchive()) return; String currentPath = archive.getArchiveEntryPath(folder); assert noTrailingSlash(currentPath); if (!entryPath.startsWith(currentPath + "/")) return; // Partial match required matchingNode[0] = folder; remainingPath[0] = entryPath.substring(currentPath.length() + 1); stop(); } protected void visitDocument(FileFolder parent, FileDocument fileDocument) { String currentPath = archive.getArchiveEntryPath(fileDocument); assert noTrailingSlash(currentPath); if (!currentPath.equals(entryPath)) return; // Exact match required matchingNode[0] = fileDocument; stop(); } }.runSilently(); TreeNode treeNode = matchingNode[0]; if (treeNode == null) throw new FileNotFoundException(); Path cacheKey = treeNode.getPath(); if (matchingNode[0] instanceof FileFolder) { // Inner archive try { archive.unpack(treeNode); } finally { archiveResource.dispose(); } File innerArchiveFile = archive.getFile(treeNode); if (innerArchiveFile == null) throw new IOException(); // Unpacking failed for some reason FileResource innerArchive = unpackCache.putIfAbsent(cacheKey, innerArchiveFile); return unpackFromArchive(config, cacheKey, innerArchive, remainingPath[0]); } else if (matchingNode[0] instanceof FileDocument) { // File FileDocument htmlDoc = (FileDocument) treeNode; FileFolder htmlFolder = htmlDoc.getHtmlFolder(); if (htmlFolder == null) { // Ordinary file or HTML file without HTML folder archive.unpack(treeNode); File htmlFile = archive.getFile(treeNode); return unpackCache.putIfAbsent(cacheKey, htmlFile); } else { // HTML file with HTML folder List<FileDocument> docsDeep = htmlFolder.getDocumentsDeep(); docsDeep.add(htmlDoc); File tempDir = Util.createTempDir(); archive.unpack(docsDeep, tempDir); File htmlFile = archive.getFile(htmlDoc); return unpackCache.putIfAbsent(cacheKey, htmlFile, tempDir); } } else { throw new IllegalStateException(); } } finally { Closeables.closeQuietly(archive); archiveResource.dispose(); } } private static boolean noTrailingSlash(@NotNull String path) { int length = path.length(); if (length == 0) return true; char lastChar = path.charAt(length - 1); return lastChar != '/' && lastChar != '\\'; } }