List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public void updateFile(long companyId, long repositoryId, long newRepositoryId, String fileName) throws PortalException, SystemException { Path sourcePath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, VERSION_DEFAULT); Path targetPath = HadoopManager.getFullVersionFilePath(companyId, newRepositoryId, fileName, VERSION_DEFAULT);/*from ww w . ja v a 2 s. c o m*/ try { FileSystem fileSystem = HadoopManager.getFileSystem(); if (fileSystem.exists(targetPath)) { throw new DuplicateFileException(fileName); } if (!fileSystem.exists(sourcePath)) { throw new PortalException("File " + sourcePath.toUri().toString() + " does not exist"); } boolean renamed = fileSystem.rename(sourcePath, targetPath); if (!renamed) { throw new SystemException("File name directory was not renamed from " + sourcePath.toUri().toString() + " to " + targetPath.toUri().toString()); } } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
public void updateFile(long companyId, long repositoryId, String fileName, String newFileName) throws PortalException, SystemException { Path sourcePath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, VERSION_DEFAULT); Path targetPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, newFileName, VERSION_DEFAULT);/*from w w w .j a v a 2 s. c om*/ try { FileSystem fileSystem = HadoopManager.getFileSystem(); if (fileSystem.exists(targetPath)) { throw new DuplicateFileException(fileName); } if (!fileSystem.exists(sourcePath)) { throw new PortalException("File " + sourcePath.toUri().toString() + " does not exist"); } boolean renamed = fileSystem.rename(sourcePath, targetPath); if (!renamed) { throw new SystemException("File name directory was not renamed from " + sourcePath.toUri().toString() + " to " + targetPath.toUri().toString()); } } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
protected void deleteEmptyAncestors(long companyId, long repositoryId, Path path) throws SystemException { try {//www . j a va 2s. com FileSystem fileSystem = HadoopManager.getFileSystem(); FileStatus[] listStatus = fileSystem.listStatus(path); if ((listStatus == null) || (listStatus.length > 0)) { return; } Path parentPath = path.getParent(); if (fileSystem.delete(path, true) && fileSystem.exists(parentPath)) { deleteEmptyAncestors(companyId, repositoryId, parentPath); } } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.liferay.hadoop.util.HadoopManager.java
License:Open Source License
public static void runJob(StoreEvent storeEvent) throws IOException { FileSystem fileSystem = getFileSystem(); if (_servletContext == null) { return;/*w w w . j ava2 s. com*/ } JobClient jobClient = getJobClient(); Path inputPath = new Path("/index".concat(storeEvent.getRootPath().toString()).concat("/*")); Path outputPath = new Path("/wordcount".concat(storeEvent.getRootPath().toString()).concat("/results")); try { if (_runningJob == null) { if (!fileSystem.exists(_jobPath)) { FSDataOutputStream outputStream = null; try { outputStream = fileSystem.create(_jobPath); InputStream inputStream = _servletContext .getResourceAsStream("/WEB-INF/lib/hadoop-job.jar"); StreamUtil.transfer(inputStream, outputStream, false); } finally { StreamUtil.cleanUp(outputStream); } } if (fileSystem.exists(outputPath)) { fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } _jobConf = new JobConf(_sharedJobConf); _jobConf.setJobName("Word Count"); _jobConf.setJarByClass(Map.class); _jobConf.setOutputKeyClass(Text.class); _jobConf.setOutputValueClass(IntWritable.class); _jobConf.setMapperClass(Map.class); _jobConf.setCombinerClass(Reduce.class); _jobConf.setReducerClass(Reduce.class); _jobConf.setInputFormat(TextInputFormat.class); _jobConf.setOutputFormat(TextOutputFormat.class); DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem); FileInputFormat.setInputPaths(_jobConf, inputPath); FileOutputFormat.setOutputPath(_jobConf, outputPath); _runningJob = jobClient.submitJob(_jobConf); } int jobState = _runningJob.getJobState(); if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) { System.out.println("Re-issuing the word count job."); if (fileSystem.exists(outputPath)) { fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } _runningJob = jobClient.submitJob(_jobConf); } } catch (Exception ioe) { ioe.printStackTrace(); } }
From source file:com.lightboxtechnologies.spectrum.ExtractMapper.java
License:Apache License
protected Map<String, Object> process_extent_large(FSDataInputStream file, FileSystem fs, Path outPath, Map<String, ?> map, Context context) throws IOException { context.getCounter(FileTypes.BIG).increment(1); final Map<String, Object> rec = new HashMap<String, Object>(); OutputStream fout = null;//from www . j av a 2 s. c o m try { fout = fs.create(outPath, true); hashAndExtract(rec, fout, file, map, context); } finally { IOUtils.closeQuietly(fout); } final String hash = new String(Hex.encodeHex((byte[]) rec.get("md5"))); final Path subDir = new Path("/texaspete/ev", hashFolder(hash)), hashPath = new Path(subDir, hash); fs.mkdirs(subDir); if (fs.exists(hashPath)) { context.getCounter(FileTypes.BIG_DUPES).increment(1); } else if (!fs.rename(outPath, hashPath)) { LOG.warn("Could not rename " + outPath + " to " + hashPath); context.getCounter(FileTypes.PROBLEMS).increment(1); } final StreamProxy content = new FileProxy(hashPath.toString()); rec.put("Content", content); return rec; }
From source file:com.lightboxtechnologies.spectrum.HDFSArchiver.java
License:Apache License
public static int runPipeline(String src, String dst) throws IOException { final Configuration conf = new Configuration(); final FileSystem fs = FileSystem.get(conf); final Path rpath = new Path(src); final Path zpath = new Path(dst); if (!fs.exists(rpath)) { throw new IOException("Source path does not exist."); }// w w w.j av a2 s . co m OutputStream out = null; try { out = zpath.getFileSystem(conf).create(zpath); zip(fs, rpath, out); out.close(); } finally { IOUtils.closeQuietly(out); } return 0; }
From source file:com.linkedin.cubert.analyzer.physical.ShuffleRewriter.java
License:Open Source License
private JsonNode rewriteDictionary(JsonNode job) { ObjectNode newJob = (ObjectNode) cloneNode(job); ObjectNode shuffle = (ObjectNode) newJob.get("shuffle"); if (shuffle == null) throw new RuntimeException("Shuffle description missing. Cannot rewrite."); newJob.put("reducers", 1); // Determine if this is a refresh job or a fresh dictionary creation by looking at // STORE location String storePath = job.get("output").get("path").getTextValue(); String dictionaryPath = storePath + "/part-r-00000.avro"; boolean refresh = false; try {//ww w. j ava 2s.com FileSystem fs = FileSystem.get(new JobConf()); refresh = fs.exists(new Path(dictionaryPath)); } catch (IOException e) { // we will not refresh } // Rewrite map JsonNode relationName = shuffle.get("name"); ObjectNode mapSideOperator = JsonUtils.createObjectNode("operator", "USER_DEFINED_TUPLE_OPERATOR", "class", "com.linkedin.cubert.operator.DictionaryRefreshMapSideOperator", "input", JsonUtils.createArrayNode(relationName), "output", relationName, "columns", shuffle.get("columns")); copyLine(shuffle, mapSideOperator, "[MAP] "); for (JsonNode map : newJob.path("map")) { if (!map.has("operators") || map.get("operators").isNull()) ((ObjectNode) map).put("operators", JsonUtils.createArrayNode()); ArrayNode operators = (ArrayNode) map.get("operators"); operators.add(mapSideOperator); } // Rewrite shuffle shuffle.put("name", relationName); shuffle.put("type", "SHUFFLE"); shuffle.put("partitionKeys", JsonUtils.createArrayNode(CommonUtils.array("colname", "colvalue"))); shuffle.put("pivotKeys", JsonUtils.createArrayNode(CommonUtils.array("colname", "colvalue"))); if (shuffle.has("columns")) shuffle.remove("columns"); if (shuffle.has("dictionaryPath")) shuffle.remove("dictionaryPath"); shuffle.remove("input"); // Rewrite reduce if (!newJob.has("reduce") || newJob.get("reduce").isNull()) newJob.put("reduce", JsonUtils.createArrayNode()); ArrayNode reduceJob = (ArrayNode) newJob.get("reduce"); ObjectNode reduceSideOperator = (ObjectNode) cloneNode(mapSideOperator); reduceSideOperator.put("class", "com.linkedin.cubert.operator.DictionaryRefreshReduceSideOperator"); copyLine(shuffle, reduceSideOperator, "[REDUCE] "); reduceJob.insert(0, reduceSideOperator); // Rewrite cached files if (refresh) { String newStorePath = storePath + "/tmp"; String newDictionaryPath = newStorePath + "/part-r-00000.avro"; // put the existing dictionary file in dist cache if (!newJob.has("cachedFiles") || newJob.get("cachedFiles").isNull()) newJob.put("cachedFiles", JsonUtils.createArrayNode()); ArrayNode cachedFiles = (ArrayNode) newJob.get("cachedFiles"); cachedFiles.add(dictionaryPath + "#dictionary"); // tell the operators to use cached dictionary mapSideOperator.put("dictionary", dictionaryPath + "#dictionary"); reduceSideOperator.put("dictionary", dictionaryPath + "#dictionary"); // the output path is changed to <original path>/tmp ((ObjectNode) newJob.get("output")).put("path", newStorePath); // put onCompletion for this job to move the new dictionary to the parent // folder ArrayNode onCompletion = mapper.createArrayNode(); // onCompletion.add(JsonUtils.createObjectNode("type", // "rm", // "paths", // JsonUtils.createArrayNode(storePath // + "/dictionary.avro"))); onCompletion.add(JsonUtils.createObjectNode("type", "mv", "paths", JsonUtils.createArrayNode(new String[] { newDictionaryPath, dictionaryPath }))); onCompletion.add( JsonUtils.createObjectNode("type", "rm", "paths", JsonUtils.createArrayNode(newStorePath))); newJob.put("onCompletion", onCompletion); } return newJob; }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorage.java
License:Apache License
/** * Get avro schema of input path. There are three cases: * 1. if path is a file, then return its avro schema; * 2. if path is a first-level directory (no sub-directories), then * return the avro schema of one underlying file; * 3. if path contains sub-directories, then recursively check * whether all of them share the same schema and return it * if so or throw an exception if not.//from w ww . j a v a 2 s . c o m * * @param path input path * @param fs file system * @return avro schema of data * @throws IOException if underlying sub-directories do not share the same schema; or if input path is empty or does not exist */ @SuppressWarnings("deprecation") protected Schema getAvroSchema(Path path, FileSystem fs) throws IOException { if (!fs.exists(path) || !AvroStorageUtils.PATH_FILTER.accept(path)) return null; /* if path is first level directory or is a file */ if (!fs.isDirectory(path)) { return getSchema(path, fs); } FileStatus[] ss = fs.listStatus(path, AvroStorageUtils.PATH_FILTER); Schema schema = null; if (ss.length > 0) { if (AvroStorageUtils.noDir(ss)) return getSchema(path, fs); /*otherwise, check whether schemas of underlying directories are the same */ for (FileStatus s : ss) { Schema newSchema = getAvroSchema(s.getPath(), fs); if (schema == null) { schema = newSchema; if (!checkSchema) { System.out.println("Do not check schema; use schema of " + s.getPath()); return schema; } } else if (newSchema != null && !schema.equals(newSchema)) { throw new IOException("Input path is " + path + ". Sub-direcotry " + s.getPath() + " contains different schema " + newSchema + " than " + schema); } } } if (schema == null) System.err.println("Cannot get avro schema! Input path " + path + " might be empty."); return schema; }
From source file:com.linkedin.cubert.plan.physical.GenerateDictionary.java
License:Open Source License
public static void mergeDictionaries(Configuration conf, Path dir) throws IOException { Map<String, CodeDictionary> dictionaries = new HashMap<String, CodeDictionary>(); FileSystem fs = FileSystem.get(conf); Path currentDictPath = new Path(dir, "dictionary"); Schema schema = getSchema();//from w w w . jav a2 s . co m // Read the existing dictionaries if (fs.exists(currentDictPath)) { dictionaries.putAll(loadDictionary(currentDictPath.toString(), true, conf)); // move the current dictionary to new file Path oldPath = new Path(dir, "_dictionary.old"); fs.delete(oldPath, true); fs.rename(currentDictPath, oldPath); } // Read the new entries Path globPath = new Path(dir, "tmp/part-*"); FileStatus[] allStatus = fs.globStatus(globPath); for (FileStatus status : allStatus) { DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>( new FsInput(status.getPath(), conf), datumReader); GenericRecord record = null; while (dataFileReader.hasNext()) { record = dataFileReader.next(); String colName = record.get("colname").toString(); String colValue = record.get("colvalue").toString(); CodeDictionary dict = dictionaries.get(colName); if (dict == null) { dict = new CodeDictionary(); dictionaries.put(colName, dict); } dict.addKey(colValue); } } // Write the dictionaries back DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); FSDataOutputStream out = fs.create(currentDictPath); dataFileWriter.create(schema, out); Record record = new Record(schema); for (Map.Entry<String, CodeDictionary> entry : dictionaries.entrySet()) { String colName = entry.getKey(); CodeDictionary dict = entry.getValue(); for (String colValue : dict.keySet()) { int code = dict.getCodeForKey(colValue); record.put("colname", colName); record.put("colvalue", colValue); record.put("code", code); dataFileWriter.append(record); } } dataFileWriter.close(); }
From source file:com.linkedin.cubert.plan.physical.JobExecutor.java
License:Open Source License
protected void setLibjars() throws IOException { if (!root.has("libjars")) return;/*from w ww . j av a 2 s . c o m*/ FileSystem localFs = FileSystem.getLocal(conf); for (JsonNode node : asArray(root, "libjars")) { Path path = new Path(node.getTextValue()); if (localFs.exists(path)) { Path dstPath = new Path(tmpDir, path.getName()); fs.copyFromLocalFile(path, dstPath); path = dstPath; } DistributedCache.addFileToClassPath(path, conf, fs); } }