List of usage examples for org.apache.hadoop.fs FileSystem delete
public abstract boolean delete(Path f, boolean recursive) throws IOException;
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public void deleteDirectory(long companyId, long repositoryId, String dirName) throws PortalException, SystemException { Path fullPath = HadoopManager.getFullDirPath(companyId, repositoryId, dirName); try {//from w ww.j a v a2s. c o m FileSystem fileSystem = HadoopManager.getFileSystem(); fileSystem.delete(fullPath, true); Path parentPath = fullPath.getParent(); deleteEmptyAncestors(parentPath); } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public void deleteFile(long companyId, long repositoryId, String fileName, String versionLabel) throws PortalException, SystemException { Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, versionLabel); try {//from w ww . j av a2s .c om FileSystem fileSystem = HadoopManager.getFileSystem(); if (fileSystem.exists(fullPath)) { fileSystem.delete(fullPath, true); } Path parentPath = fullPath.getParent(); deleteEmptyAncestors(companyId, repositoryId, parentPath); } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
protected void deleteEmptyAncestors(long companyId, long repositoryId, Path path) throws SystemException { try {/*from ww w .j a v a2s . c om*/ FileSystem fileSystem = HadoopManager.getFileSystem(); FileStatus[] listStatus = fileSystem.listStatus(path); if ((listStatus == null) || (listStatus.length > 0)) { return; } Path parentPath = path.getParent(); if (fileSystem.delete(path, true) && fileSystem.exists(parentPath)) { deleteEmptyAncestors(companyId, repositoryId, parentPath); } } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.linkedin.cubert.examples.Purge.java
License:Open Source License
private void swap(String original, String temp) throws IOException { Path source = new Path(temp); Path dest = new Path(original); FileSystem fs = dest.getFileSystem(conf); fs.delete(dest, true); fs.rename(source, dest);/*w w w . ja v a2 s . c o m*/ }
From source file:com.linkedin.cubert.plan.physical.CompletionTasks.java
License:Open Source License
public static void doCompletionTasks(JsonNode tasks) throws IOException { FileSystem fs = FileSystem.get(new JobConf()); for (int i = 0; i < tasks.size(); i++) { try {/* w w w. java 2 s.c o m*/ final JsonNode task = tasks.get(i); final String taskType = JsonUtils.getText(task, "type"); final String[] paths = JsonUtils.asArray(task, "paths"); if (taskType.equals("rm")) { for (String path : paths) { System.out.println("Deleting path " + path + "..."); fs.delete(new Path(path), true); } } else if (taskType.equals("mv")) { System.out.println("Moving " + paths[0] + " to " + paths[1] + "..."); final Path from = new Path(paths[0]); final Path to = new Path(paths[1]); fs.delete(to, true); fs.rename(from, to); } } catch (IOException e) { System.err.println("ERROR: " + e.getMessage()); } } }
From source file:com.linkedin.cubert.plan.physical.ExecutorService.java
License:Open Source License
private void onCompletion() throws IOException { if (json.has("onCompletion") && !json.get("onCompletion").isNull()) { JsonNode tasks = json.get("onCompletion"); FileSystem fs = FileSystem.get(new JobConf()); for (int i = 0; i < tasks.size(); i++) { try { final JsonNode task = tasks.get(i); final String taskType = JsonUtils.getText(task, "type"); final String[] paths = JsonUtils.asArray(task, "paths"); if (taskType.equals("rm")) { for (String path : paths) { System.out.println("Deleting path " + path + "..."); fs.delete(new Path(path), true); }//from w ww .j a v a2 s .c o m } else if (taskType.equals("mv")) { System.out.println("Moving " + paths[0] + " to " + paths[1] + "..."); final Path from = new Path(paths[0]); final Path to = new Path(paths[1]); fs.delete(to, true); fs.rename(from, to); } } catch (IOException e) { System.err.println("ERROR: " + e.getMessage()); } } } }
From source file:com.linkedin.cubert.plan.physical.GenerateDictionary.java
License:Open Source License
public static void mergeDictionaries(Configuration conf, Path dir) throws IOException { Map<String, CodeDictionary> dictionaries = new HashMap<String, CodeDictionary>(); FileSystem fs = FileSystem.get(conf); Path currentDictPath = new Path(dir, "dictionary"); Schema schema = getSchema();//from w w w . j a v a 2s .c o m // Read the existing dictionaries if (fs.exists(currentDictPath)) { dictionaries.putAll(loadDictionary(currentDictPath.toString(), true, conf)); // move the current dictionary to new file Path oldPath = new Path(dir, "_dictionary.old"); fs.delete(oldPath, true); fs.rename(currentDictPath, oldPath); } // Read the new entries Path globPath = new Path(dir, "tmp/part-*"); FileStatus[] allStatus = fs.globStatus(globPath); for (FileStatus status : allStatus) { DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>( new FsInput(status.getPath(), conf), datumReader); GenericRecord record = null; while (dataFileReader.hasNext()) { record = dataFileReader.next(); String colName = record.get("colname").toString(); String colValue = record.get("colvalue").toString(); CodeDictionary dict = dictionaries.get(colName); if (dict == null) { dict = new CodeDictionary(); dictionaries.put(colName, dict); } dict.addKey(colValue); } } // Write the dictionaries back DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); FSDataOutputStream out = fs.create(currentDictPath); dataFileWriter.create(schema, out); Record record = new Record(schema); for (Map.Entry<String, CodeDictionary> entry : dictionaries.entrySet()) { String colName = entry.getKey(); CodeDictionary dict = entry.getValue(); for (String colValue : dict.keySet()) { int code = dict.getCodeForKey(colValue); record.put("colname", colName); record.put("colvalue", colValue); record.put("code", code); dataFileWriter.append(record); } } dataFileWriter.close(); }
From source file:com.linkedin.cubert.utils.CodeDictionary.java
License:Open Source License
public void write(FileSystem fs, Path path) throws IOException { // if the path exists, rename the existing file with ".old" suffix if (fs.exists(path)) { Path renamePath = new Path(path.toString() + ".old"); fs.delete(renamePath, false); fs.rename(path, renamePath);/*from w w w . j a va 2 s . com*/ } // Write data to file FSDataOutputStream ostream = fs.create(path); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(ostream)); for (Map.Entry<String, Integer> entry : keyToCodeMap.entrySet()) { String line = String.format("%s %d\n", entry.getKey(), entry.getValue()); writer.write(line); } writer.flush(); writer.close(); ostream.close(); }
From source file:com.linkedin.hadoop.example.WordCountCounters.java
License:Apache License
/** * Azkaban will look for a method named `run` to start your job. Use this method to setup all the * Hadoop-related configuration for your job and submit it. * * @throws Exception If there is an exception during the configuration or submission of your job *//*from w w w. j ava 2 s . c om*/ public void run() throws Exception { _logger.info(String.format("Configuring job for the class %s", getClass().getSimpleName())); Job job = Job.getInstance(getConf()); job.setJarByClass(WordCountJob.class); job.setJobName(_name); job.setMapperClass(WordCountMapper.class); job.setCombinerClass(WordCountCombiner.class); job.setReducerClass(WordCountReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); String inputPath = _properties.getProperty("input.path"); String outputPath = _properties.getProperty("output.path"); boolean forceOverwrite = Boolean.parseBoolean(_properties.getProperty("force.output.overwrite", "false")); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); // Before we submit the job, remove the old the output directory if (forceOverwrite) { FileSystem fs = FileSystem.get(job.getConfiguration()); fs.delete(FileOutputFormat.getOutputPath(job), true); } // Since we have Kerberos enabled at LinkedIn, we must add the token to our configuration. If // you don't use Kerberos security for your Hadoop cluster, you don't need this code. if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Submit the job for execution _logger.info(String.format("About to submit the job named %s", _name)); boolean succeeded = job.waitForCompletion(true); // Before we return, display our custom counters for the job in the Azkaban logs long inputWords = job.getCounters().findCounter(WordCountCounters.INPUT_WORDS).getValue(); _logger.info(String.format("Read a total of %d input words", inputWords)); // Azkaban will not realize the Hadoop job failed unless you specifically throw an exception if (!succeeded) { throw new Exception(String.format("Azkaban job %s failed", _name)); } }
From source file:com.linkedin.mapred.AbstractAvroJob.java
License:Open Source License
/** * Sets up various standard settings in the JobConf. You probably don't want to mess with this. * /*w w w. j a v a 2 s . c o m*/ * @return A configured JobConf. * @throws IOException * @throws URISyntaxException */ protected JobConf createJobConf() throws IOException, URISyntaxException { JobConf conf = new JobConf(); conf.setJobName(getJobId()); conf.setInputFormat(AvroInputFormat.class); conf.setOutputFormat(AvroOutputFormat.class); AvroOutputFormat.setDeflateLevel(conf, 9); String hadoop_ugi = _config.getString("hadoop.job.ugi", null); if (hadoop_ugi != null) { conf.set("hadoop.job.ugi", hadoop_ugi); } if (_config.getBoolean("is.local", false)) { conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); conf.set("mapred.local.dir", "/tmp/map-red"); _log.info("Running locally, no hadoop jar set."); } // set JVM options if present if (_config.containsKey("mapred.child.java.opts")) { conf.set("mapred.child.java.opts", _config.getString("mapred.child.java.opts")); _log.info("mapred.child.java.opts set to " + _config.getString("mapred.child.java.opts")); } if (_config.containsKey(INPUT_PATHS)) { List<String> inputPathnames = _config.getStringList(INPUT_PATHS); for (String pathname : inputPathnames) { AvroUtils.addAllSubPaths(conf, new Path(pathname)); } AvroJob.setInputSchema(conf, AvroUtils.getAvroInputSchema(conf)); } if (_config.containsKey(OUTPUT_PATH)) { Path path = new Path(_config.get(OUTPUT_PATH)); AvroOutputFormat.setOutputPath(conf, path); if (_config.getBoolean("force.output.overwrite", false)) { FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf); fs.delete(FileOutputFormat.getOutputPath(conf), true); } } // set all hadoop configs for (String key : _config.keySet()) { String lowerCase = key.toLowerCase(); if (lowerCase.startsWith(HADOOP_PREFIX)) { String newKey = key.substring(HADOOP_PREFIX.length()); conf.set(newKey, _config.get(key)); } } return conf; }