List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:com.cloudera.sqoop.TestMerge.java
License:Apache License
/** * Return true if there's a file in 'dirName' with a line that starts with * 'prefix'./*from ww w .j a va 2s . c o m*/ */ protected boolean recordStartsWith(String prefix, String dirName) throws Exception { Path warehousePath = new Path(LOCAL_WAREHOUSE_DIR); Path targetPath = new Path(warehousePath, dirName); FileSystem fs = FileSystem.getLocal(new Configuration()); FileStatus[] files = fs.listStatus(targetPath); if (null == files || files.length == 0) { fail("Got no import files!"); } for (FileStatus stat : files) { Path p = stat.getPath(); if (p.getName().startsWith("part-")) { if (checkFileForLine(fs, p, prefix)) { // We found the line. Nothing further to do. return true; } } } return false; }
From source file:com.cloudera.training.metrics.JobHistoryHelper.java
License:Apache License
public static JobHistory.JobInfo getJobInfoFromLocalFile(String outputFile, Configuration conf) throws IOException { FileSystem fs = FileSystem.getLocal(conf); Path outputFilePath = new Path(outputFile); String[] jobDetails = JobHistory.JobInfo.decodeJobHistoryFileName(outputFilePath.getName()).split("_"); String jobId = jobDetails[2] + "_" + jobDetails[3] + "_" + jobDetails[4]; JobHistory.JobInfo job = new JobHistory.JobInfo(jobId); DefaultJobHistoryParser.parseJobTasks(outputFile, job, fs); return job;/*from ww w . j av a 2s. c o m*/ }
From source file:com.dasasian.chok.mapfile.MapFileServer.java
License:Apache License
public MapFileServer() throws IOException { fileSystem = FileSystem.getLocal(conf); }
From source file:com.dasasian.chok.util.FileUtilTest.java
License:Apache License
@Test public void testUnzipPathFileFileSystemBoolean() throws IOException { Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.getLocal(configuration); // Test the unspooled case File targetFolder = temporaryFolder.newFolder("unpacked2"); Path zipPath = new Path(testZipFile.getAbsolutePath()); FileUtil.unzip(zipPath, targetFolder, fileSystem, false); File segment = new File(targetFolder, INDEX_TXT); assertTrue("Unzipped local zip directly to target", segment.exists()); // Test the spooled case targetFolder = temporaryFolder.newFolder("unpacked3"); zipPath = new Path(testZipFile.getAbsolutePath()); FileUtil.unzip(zipPath, targetFolder, fileSystem, true); segment = new File(targetFolder, INDEX_TXT); assertTrue("Unzipped spooled local zip to target", segment.exists()); }
From source file:com.datamoin.tajo.tpcds.TpcDSTestUtil.java
License:Apache License
public static void createTables(String database, TajoClient client) throws Exception { String dataDir = getDataDir(); if (dataDir == null || dataDir.isEmpty()) { throw new IOException("No TPCDS_DATA_DIR property. Use -DTPCDS_DATA_DIR=<data dir>"); }// www . j a v a 2s. c o m if (dataDir.startsWith("hdfs://")) { Path path = new Path(dataDir); FileSystem fs = path.getFileSystem(new Configuration()); for (String eachTable : tableNames) { Path tableDataDir = new Path(path, eachTable); if (!fs.exists(tableDataDir)) { throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists."); } } } else { File dataDirFile = new File(dataDir); if (!dataDirFile.exists()) { throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] not exists."); } if (dataDirFile.isFile()) { throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] is not a directory."); } for (String eachTable : tableNames) { File tableDataDir = new File(dataDirFile, eachTable); if (!tableDataDir.exists()) { throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists."); } } } KeyValueSet opt = new KeyValueSet(); opt.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER); LOG.info("Create database: " + database); client.executeQuery("create database if not exists " + database); Path tpcdsResourceURL = new Path(ClassLoader.getSystemResource("tpcds").toString()); Path ddlPath = new Path(tpcdsResourceURL, "ddl"); FileSystem localFs = FileSystem.getLocal(new Configuration()); FileStatus[] files = localFs.listStatus(ddlPath); String dataDirWithPrefix = dataDir; if (dataDir.indexOf("://") < 0) { dataDirWithPrefix = "file://" + dataDir; } for (FileStatus eachFile : files) { if (eachFile.isFile()) { String tableName = eachFile.getPath().getName().split("\\.")[0]; String query = FileUtil.readTextFile(new File(eachFile.getPath().toUri())); query = query.replace("${DB}", database); query = query.replace("${DATA_LOCATION}", dataDirWithPrefix + "/" + tableName); LOG.info("Create table:" + tableName + "," + query); client.executeQuery(query); } } }
From source file:com.datasalt.pangool.solr.SolrRecordWriter.java
License:Apache License
private Path findSolrConfig(Configuration conf) throws IOException { Path solrHome = null;//from w ww .j a v a 2 s.c o m // we added these lines to make this patch work on Hadoop 0.20.2 FileSystem localFs = FileSystem.getLocal(conf); if (FileSystem.get(conf).equals(localFs)) { return new Path(localSolrHome); } // end-of-addition Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); if (localArchives.length == 0) { throw new IOException(String.format("No local cache archives, where is %s", zipName)); } for (Path unpackedDir : localArchives) { // Only logged if debugging if (LOG.isDebugEnabled()) { LOG.debug(String.format("Examining unpack directory %s for %s", unpackedDir, zipName)); ProcessBuilder lsCmd = new ProcessBuilder( new String[] { "/bin/ls", "-lR", unpackedDir.toString() }); lsCmd.redirectErrorStream(); Process ls = lsCmd.start(); try { byte[] buf = new byte[16 * 1024]; InputStream all = ls.getInputStream(); int count; while ((count = all.read(buf)) > 0) { System.err.write(buf, 0, count); } } catch (IOException ignore) { } System.err.format("Exit value is %d%n", ls.exitValue()); } if (unpackedDir.getName().equals(zipName)) { solrHome = unpackedDir; break; } } return solrHome; }
From source file:com.datasalt.pangool.solr.SolrRecordWriter.java
License:Apache License
/** * Write a file to a zip output stream, removing leading path name components from the actual file name when creating * the zip file entry.//from www. jav a 2 s.c o m * * The entry placed in the zip file is <code>baseName</code>/ <code>relativePath</code>, where * <code>relativePath</code> is constructed by removing a leading <code>root</code> from the path for * <code>itemToZip</code>. * * If <code>itemToZip</code> is an empty directory, it is ignored. If <code>itemToZip</code> is a directory, the * contents of the directory are added recursively. * * @param zos * The zip output stream * @param baseName * The base name to use for the file name entry in the zip file * @param root * The path to remove from <code>itemToZip</code> to make a relative path name * @param itemToZip * The path to the file to be added to the zip file * @return the number of entries added * @throws IOException */ static public int zipDirectory(final Configuration conf, final ZipOutputStream zos, final String baseName, final String root, final Path itemToZip) throws IOException { LOG.info(String.format("zipDirectory: %s %s %s", baseName, root, itemToZip)); LocalFileSystem localFs = FileSystem.getLocal(conf); int count = 0; final FileStatus itemStatus = localFs.getFileStatus(itemToZip); if (itemStatus.isDir()) { final FileStatus[] statai = localFs.listStatus(itemToZip); // Add a directory entry to the zip file final String zipDirName = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root); final ZipEntry dirZipEntry = new ZipEntry(zipDirName + Path.SEPARATOR_CHAR); LOG.info(String.format("Adding directory %s to zip", zipDirName)); zos.putNextEntry(dirZipEntry); zos.closeEntry(); count++; if (statai == null || statai.length == 0) { LOG.info(String.format("Skipping empty directory %s", itemToZip)); return count; } for (FileStatus status : statai) { count += zipDirectory(conf, zos, baseName, root, status.getPath()); } LOG.info(String.format("Wrote %d entries for directory %s", count, itemToZip)); return count; } final String inZipPath = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root); if (inZipPath.length() == 0) { LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)", itemToZip, root, baseName)); return 0; } // Take empty files in case the place holder is needed FSDataInputStream in = null; try { in = localFs.open(itemToZip); final ZipEntry ze = new ZipEntry(inZipPath); ze.setTime(itemStatus.getModificationTime()); // Comments confuse looking at the zip file // ze.setComment(itemToZip.toString()); zos.putNextEntry(ze); IOUtils.copyBytes(in, zos, conf, false); zos.closeEntry(); LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip)); return 1; } finally { in.close(); } }
From source file:com.datasalt.pangool.tuplemr.mapred.TestRollup.java
License:Apache License
/** * /*from ww w . j a v a 2s .co m*/ * Checks that {@link RollupReducer} calls properly {@link TupleReducer#onOpenGroup}, * {@link TupleReducer#onCloseGroup} and {@link TupleReducer#onGroupElements} and checks that the elements (tuples) * passed are coherent. This method assumes an specific output from the {@link TupleReducer}. The output needs to be a * Text,Text for key and value This will be the format used : key("OPEN depth"), value("serialized value") * key("CLOSE depth"), value("serialized value") key("ELEMENT"),value("serialized element") (for every element * received in onElements needs to contain a record like this) * * For instance : key("OPEN 0"), value(" element1") key("OPEN 1"), value("element1 ") key("ELEMENT") , value * ("element1") key("ELEMENT"),value ("element2") key("CLOSE 1"),value ("element2") key("CLOSE 0"),value("element2") * * */ public void checkRollupOutput(Path path, int minDepth, int maxDepth) throws IOException { SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.getLocal(getConf()), path, getConf()); Text actualKey = new Text(); Text actualValue = new Text(); reader.next(actualKey, actualValue); // first action String currentKey = actualKey.toString(); String currentValue = actualValue.toString(); Assert.assertTrue("First output needs to be an OPEN ", currentKey.startsWith("OPEN")); int currentDepth = Integer.parseInt(currentKey.split(" ")[1]); Assert.assertEquals("First OPEN needs to match minDepth", minDepth, currentDepth); int lastDepth = currentDepth; String lastValue = currentValue; State lastState = State.OPEN; while (reader.next(actualKey, actualValue)) { currentKey = actualKey.toString(); currentValue = actualValue.toString(); if (currentKey.startsWith("OPEN")) { currentDepth = Integer.parseInt(currentKey.split(" ")[1]); Assert.assertEquals("OPEN needs to increase depth in +1 ", lastDepth + 1, currentDepth); Assert.assertTrue("Too many OPENs, over maxDepth ", maxDepth >= currentDepth); if (lastState == State.OPEN) { Assert.assertEquals("First element in OPEN needs to match first element in previous OPEN", lastValue, currentValue); } else if (lastState == State.CLOSE) { Assert.assertNotSame( "Element from new group needs to be different from last element from last group ", lastValue, currentValue); } else { Assert.fail("Not allowed OPEN after ELEMENT"); } lastState = State.OPEN; lastValue = currentValue; lastDepth = currentDepth; } else if (currentKey.startsWith("CLOSE")) { currentDepth = Integer.parseInt(currentKey.split(" ")[1]); Assert.assertNotSame("Not allowed CLOSE after OPEN , needs at least one ELEMENT in between", State.OPEN, lastState); Assert.assertEquals("CLOSE depth needs to match previous OPEN depth", lastDepth, currentDepth); Assert.assertEquals("Element in CLOSE needs to match lastElement in group", lastValue, currentValue); lastState = State.CLOSE; lastValue = currentValue; lastDepth = currentDepth - 1; } else if (currentKey.startsWith("ELEMENT")) { Assert.assertNotSame("Not allowed ELEMENT after CLOSE, needs an OPEN or ELEMENT before", State.CLOSE, lastState); lastState = State.ELEMENT; lastValue = currentValue; } } Assert.assertEquals("File doesn't properly finishes with a CLOSE ", State.CLOSE, lastState); Assert.assertEquals("Last CLOSE doesn't close the minDepth ", minDepth - 1, lastDepth); reader.close(); }
From source file:com.datasalt.pangool.utils.DCUtils.java
License:Apache License
/** * Utility method for serializing an object and saving it in the Distributed Cache. * <p>//www.j a va 2s. co m * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call * {@link DCUtils#loadSerializedObjectInDC(Configuration, Class, String, boolean)} to re-instantiate the serialized instance. * * @param obj The obj instance to serialize using Java serialization. * @param serializeToLocalFile The local file where the instance will be serialized. It will be copied to the HDFS and removed. * @param conf The Hadoop Configuration. * @throws FileNotFoundException * @throws IOException * @throws URISyntaxException */ public static void serializeToDC(Object obj, String serializeToLocalFile, Configuration conf) throws FileNotFoundException, IOException, URISyntaxException { File hadoopTmpDir = new File(conf.get("hadoop.tmp.dir")); if (!hadoopTmpDir.exists()) { hadoopTmpDir.mkdir(); } File file = new File(hadoopTmpDir, serializeToLocalFile); FileSystem fS = FileSystem.get(conf); ObjectOutput out = new ObjectOutputStream(new FileOutputStream(file)); out.writeObject(obj); out.close(); if (fS.equals(FileSystem.getLocal(conf))) { return; } String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF); if (tmpHdfsFolder == null) { // set the temporary folder for Pangool instances to the temporary of the user that is running the Job // This folder will be used across the cluster for location the instances. This way, tasktrackers // that are being run as different user will still be able to locate this folder tmpHdfsFolder = conf.get("hadoop.tmp.dir"); conf.set(HDFS_TMP_FOLDER_CONF, tmpHdfsFolder); } Path toHdfs = new Path(tmpHdfsFolder, serializeToLocalFile); if (fS.exists(toHdfs)) { // Optionally, copy to DFS if fS.delete(toHdfs, false); } FileUtil.copy(FileSystem.getLocal(conf), new Path(file + ""), FileSystem.get(conf), toHdfs, true, conf); DistributedCache.addCacheFile(toHdfs.toUri(), conf); }
From source file:com.datasalt.pangool.utils.DCUtils.java
License:Apache License
/** * Given a file post-fix, locate a file in the DistributedCache. It iterates over all the local files and returns the * first one that meets this condition.// w ww .j ava 2s.c om * * @param conf * The Hadoop Configuration. * @param filePostFix * The file post-fix. * @throws IOException */ public static Path locateFileInDC(Configuration conf, String filePostFix) throws IOException { FileSystem fS = FileSystem.get(conf); Path locatedFile = null; if (fS.equals(FileSystem.getLocal(conf))) { // We use the File Java API in local because the Hadoop Path, FileSystem, etc is too slow for tests that // need to call this method a lot File tmpFolder = new File(conf.get("hadoop.tmp.dir")); for (File file : tmpFolder.listFiles()) { if (file.getName().endsWith(filePostFix)) { locatedFile = new Path(file.toString()); break; } } } else { Path tmpHdfsFolder = new Path(conf.get(HDFS_TMP_FOLDER_CONF, conf.get("hadoop.tmp.dir"))); for (FileStatus fSt : fS.listStatus(tmpHdfsFolder)) { Path path = fSt.getPath(); if (path.toString().endsWith(filePostFix)) { locatedFile = path; break; } } } return locatedFile; }