List of usage examples for org.apache.hadoop.fs FileSystem rename
public abstract boolean rename(Path src, Path dst) throws IOException;
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public void updateFile(long companyId, long repositoryId, long newRepositoryId, String fileName) throws PortalException, SystemException { Path sourcePath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, VERSION_DEFAULT); Path targetPath = HadoopManager.getFullVersionFilePath(companyId, newRepositoryId, fileName, VERSION_DEFAULT);//from www. j ava 2 s . c o m try { FileSystem fileSystem = HadoopManager.getFileSystem(); if (fileSystem.exists(targetPath)) { throw new DuplicateFileException(fileName); } if (!fileSystem.exists(sourcePath)) { throw new PortalException("File " + sourcePath.toUri().toString() + " does not exist"); } boolean renamed = fileSystem.rename(sourcePath, targetPath); if (!renamed) { throw new SystemException("File name directory was not renamed from " + sourcePath.toUri().toString() + " to " + targetPath.toUri().toString()); } } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
public void updateFile(long companyId, long repositoryId, String fileName, String newFileName) throws PortalException, SystemException { Path sourcePath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, VERSION_DEFAULT); Path targetPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, newFileName, VERSION_DEFAULT);/*from www. java 2s. co m*/ try { FileSystem fileSystem = HadoopManager.getFileSystem(); if (fileSystem.exists(targetPath)) { throw new DuplicateFileException(fileName); } if (!fileSystem.exists(sourcePath)) { throw new PortalException("File " + sourcePath.toUri().toString() + " does not exist"); } boolean renamed = fileSystem.rename(sourcePath, targetPath); if (!renamed) { throw new SystemException("File name directory was not renamed from " + sourcePath.toUri().toString() + " to " + targetPath.toUri().toString()); } } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.liferay.hadoop.util.HadoopManager.java
License:Open Source License
public static void runJob(StoreEvent storeEvent) throws IOException { FileSystem fileSystem = getFileSystem(); if (_servletContext == null) { return;/* w w w . j a va 2 s. c om*/ } JobClient jobClient = getJobClient(); Path inputPath = new Path("/index".concat(storeEvent.getRootPath().toString()).concat("/*")); Path outputPath = new Path("/wordcount".concat(storeEvent.getRootPath().toString()).concat("/results")); try { if (_runningJob == null) { if (!fileSystem.exists(_jobPath)) { FSDataOutputStream outputStream = null; try { outputStream = fileSystem.create(_jobPath); InputStream inputStream = _servletContext .getResourceAsStream("/WEB-INF/lib/hadoop-job.jar"); StreamUtil.transfer(inputStream, outputStream, false); } finally { StreamUtil.cleanUp(outputStream); } } if (fileSystem.exists(outputPath)) { fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } _jobConf = new JobConf(_sharedJobConf); _jobConf.setJobName("Word Count"); _jobConf.setJarByClass(Map.class); _jobConf.setOutputKeyClass(Text.class); _jobConf.setOutputValueClass(IntWritable.class); _jobConf.setMapperClass(Map.class); _jobConf.setCombinerClass(Reduce.class); _jobConf.setReducerClass(Reduce.class); _jobConf.setInputFormat(TextInputFormat.class); _jobConf.setOutputFormat(TextOutputFormat.class); DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem); FileInputFormat.setInputPaths(_jobConf, inputPath); FileOutputFormat.setOutputPath(_jobConf, outputPath); _runningJob = jobClient.submitJob(_jobConf); } int jobState = _runningJob.getJobState(); if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) { System.out.println("Re-issuing the word count job."); if (fileSystem.exists(outputPath)) { fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } _runningJob = jobClient.submitJob(_jobConf); } } catch (Exception ioe) { ioe.printStackTrace(); } }
From source file:com.lightboxtechnologies.spectrum.ExtractMapper.java
License:Apache License
protected Map<String, Object> process_extent_large(FSDataInputStream file, FileSystem fs, Path outPath, Map<String, ?> map, Context context) throws IOException { context.getCounter(FileTypes.BIG).increment(1); final Map<String, Object> rec = new HashMap<String, Object>(); OutputStream fout = null;//from w w w . j ava 2 s . c o m try { fout = fs.create(outPath, true); hashAndExtract(rec, fout, file, map, context); } finally { IOUtils.closeQuietly(fout); } final String hash = new String(Hex.encodeHex((byte[]) rec.get("md5"))); final Path subDir = new Path("/texaspete/ev", hashFolder(hash)), hashPath = new Path(subDir, hash); fs.mkdirs(subDir); if (fs.exists(hashPath)) { context.getCounter(FileTypes.BIG_DUPES).increment(1); } else if (!fs.rename(outPath, hashPath)) { LOG.warn("Could not rename " + outPath + " to " + hashPath); context.getCounter(FileTypes.PROBLEMS).increment(1); } final StreamProxy content = new FileProxy(hashPath.toString()); rec.put("Content", content); return rec; }
From source file:com.linkedin.cubert.examples.Purge.java
License:Open Source License
private void swap(String original, String temp) throws IOException { Path source = new Path(temp); Path dest = new Path(original); FileSystem fs = dest.getFileSystem(conf); fs.delete(dest, true);/*from w ww.j a v a 2 s .c o m*/ fs.rename(source, dest); }
From source file:com.linkedin.cubert.plan.physical.CompletionTasks.java
License:Open Source License
public static void doCompletionTasks(JsonNode tasks) throws IOException { FileSystem fs = FileSystem.get(new JobConf()); for (int i = 0; i < tasks.size(); i++) { try {// www . j a v a2 s. c om final JsonNode task = tasks.get(i); final String taskType = JsonUtils.getText(task, "type"); final String[] paths = JsonUtils.asArray(task, "paths"); if (taskType.equals("rm")) { for (String path : paths) { System.out.println("Deleting path " + path + "..."); fs.delete(new Path(path), true); } } else if (taskType.equals("mv")) { System.out.println("Moving " + paths[0] + " to " + paths[1] + "..."); final Path from = new Path(paths[0]); final Path to = new Path(paths[1]); fs.delete(to, true); fs.rename(from, to); } } catch (IOException e) { System.err.println("ERROR: " + e.getMessage()); } } }
From source file:com.linkedin.cubert.plan.physical.ExecutorService.java
License:Open Source License
private void onCompletion() throws IOException { if (json.has("onCompletion") && !json.get("onCompletion").isNull()) { JsonNode tasks = json.get("onCompletion"); FileSystem fs = FileSystem.get(new JobConf()); for (int i = 0; i < tasks.size(); i++) { try { final JsonNode task = tasks.get(i); final String taskType = JsonUtils.getText(task, "type"); final String[] paths = JsonUtils.asArray(task, "paths"); if (taskType.equals("rm")) { for (String path : paths) { System.out.println("Deleting path " + path + "..."); fs.delete(new Path(path), true); }// w ww. java2s.co m } else if (taskType.equals("mv")) { System.out.println("Moving " + paths[0] + " to " + paths[1] + "..."); final Path from = new Path(paths[0]); final Path to = new Path(paths[1]); fs.delete(to, true); fs.rename(from, to); } } catch (IOException e) { System.err.println("ERROR: " + e.getMessage()); } } } }
From source file:com.linkedin.cubert.plan.physical.GenerateDictionary.java
License:Open Source License
public static void mergeDictionaries(Configuration conf, Path dir) throws IOException { Map<String, CodeDictionary> dictionaries = new HashMap<String, CodeDictionary>(); FileSystem fs = FileSystem.get(conf); Path currentDictPath = new Path(dir, "dictionary"); Schema schema = getSchema();//from w w w.j av a 2s. c o m // Read the existing dictionaries if (fs.exists(currentDictPath)) { dictionaries.putAll(loadDictionary(currentDictPath.toString(), true, conf)); // move the current dictionary to new file Path oldPath = new Path(dir, "_dictionary.old"); fs.delete(oldPath, true); fs.rename(currentDictPath, oldPath); } // Read the new entries Path globPath = new Path(dir, "tmp/part-*"); FileStatus[] allStatus = fs.globStatus(globPath); for (FileStatus status : allStatus) { DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>( new FsInput(status.getPath(), conf), datumReader); GenericRecord record = null; while (dataFileReader.hasNext()) { record = dataFileReader.next(); String colName = record.get("colname").toString(); String colValue = record.get("colvalue").toString(); CodeDictionary dict = dictionaries.get(colName); if (dict == null) { dict = new CodeDictionary(); dictionaries.put(colName, dict); } dict.addKey(colValue); } } // Write the dictionaries back DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); FSDataOutputStream out = fs.create(currentDictPath); dataFileWriter.create(schema, out); Record record = new Record(schema); for (Map.Entry<String, CodeDictionary> entry : dictionaries.entrySet()) { String colName = entry.getKey(); CodeDictionary dict = entry.getValue(); for (String colValue : dict.keySet()) { int code = dict.getCodeForKey(colValue); record.put("colname", colName); record.put("colvalue", colValue); record.put("code", code); dataFileWriter.append(record); } } dataFileWriter.close(); }
From source file:com.linkedin.cubert.utils.CodeDictionary.java
License:Open Source License
public void write(FileSystem fs, Path path) throws IOException { // if the path exists, rename the existing file with ".old" suffix if (fs.exists(path)) { Path renamePath = new Path(path.toString() + ".old"); fs.delete(renamePath, false);/*from w ww. java 2s . c o m*/ fs.rename(path, renamePath); } // Write data to file FSDataOutputStream ostream = fs.create(path); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(ostream)); for (Map.Entry<String, Integer> entry : keyToCodeMap.entrySet()) { String line = String.format("%s %d\n", entry.getKey(), entry.getValue()); writer.write(line); } writer.flush(); writer.close(); ostream.close(); }
From source file:com.linkedin.mlease.regression.jobs.RegressionAdmmTrain.java
License:Open Source License
@Override public void run() throws Exception { _logger.info("Now running Regression Train using ADMM..."); JobConfig props = super.getJobConfig(); String outBasePath = props.getString(OUTPUT_BASE_PATH); JobConf conf = super.createJobConf(); // Various configs int nblocks = props.getInt(NUM_BLOCKS); int niter = props.getInt(NUM_ITERS, 10); //Aggressive decay of liblinear_epsilon boolean aggressiveLiblinearEpsilonDecay = props.getBoolean(AGGRESSIVE_LIBLINEAR_EPSILON_DECAY, false); // Getting the value of the regularizer L1/L2 int reg = props.getInt(REGULARIZER); if ((reg != 1) && (reg != 2)) { throw new IOException("Only L1 and L2 regularization supported!"); }//from w ww . j a va 2 s. co m int numClickReplicates = props.getInt(NUM_CLICK_REPLICATES, 1); boolean ignoreValue = props.getBoolean(BINARY_FEATURE, false); float initializeBoostRate = props.getFloat(INITIALIZE_BOOST_RATE, 0); float rhoAdaptCoefficient = props.getFloat(RHO_ADAPT_COEFFICIENT, 0); // handling lambda and rho // initialize z and u and compute z-u and write to hadoop Map<String, LinearModel> z = new HashMap<String, LinearModel>(); // lambda -> List<String> lambdastr = props.getStringList(LAMBDA, ","); List<String> rhostr = props.getStringList(RHO, null, ","); if (rhostr != null) { if (rhostr.size() != lambdastr.size()) throw new IOException( "The number of rho's should be exactly the same as the number of lambda's. OR: don't claim rho!"); } Map<Float, Float> lambdaRho = new HashMap<Float, Float>(); for (int j = 0; j < lambdastr.size(); j++) { float lambda = Float.parseFloat(lambdastr.get(j)); float rho; if (rhostr != null) { rho = Float.parseFloat(rhostr.get(j)); } else { if (lambda <= 100) { rho = 1; } else { rho = 10; } } lambdaRho.put(lambda, rho); z.put(String.valueOf(lambda), new LinearModel()); } // Get specific lambda treatment for some features String lambdaMapPath = props.getString(LAMBDA_MAP, ""); Map<String, Float> lambdaMap = new HashMap<String, Float>(); if (!lambdaMapPath.equals("")) { AvroHdfsFileReader reader = new AvroHdfsFileReader(conf); ReadLambdaMapConsumer consumer = new ReadLambdaMapConsumer(); reader.build(lambdaMapPath, consumer); consumer.done(); lambdaMap = consumer.get(); } _logger.info("Lambda Map has size = " + String.valueOf(lambdaMap.size())); // Write lambda_rho mapping into file String rhoPath = outBasePath + "/lambda-rho/part-r-00000.avro"; writeLambdaRho(conf, rhoPath, lambdaRho); // test-loglik computation boolean testLoglikPerIter = props.getBoolean(TEST_LOGLIK_PER_ITER, false); DataFileWriter<GenericRecord> testRecordWriter = null; // test if the test file exists String testPath = props.getString(TEST_PATH, ""); testLoglikPerIter = Util.checkPath(testPath); if (testLoglikPerIter) { List<Path> testPathList = AvroUtils.enumerateFiles(conf, new Path(testPath)); if (testPathList.size() > 0) { testPath = testPathList.get(0).toString(); _logger.info("Sample test path = " + testPath); AvroHdfsFileWriter<GenericRecord> writer = new AvroHdfsFileWriter<GenericRecord>(conf, outBasePath + "/sample-test-loglik/write-test-00000.avro", SampleTestLoglik.SCHEMA$); testRecordWriter = writer.get(); } } if (testRecordWriter == null) { testLoglikPerIter = false; _logger.info( "test.loglik.per.iter=false or test path doesn't exist or is empty! So we will not output test loglik per iteration."); } else { testRecordWriter.close(); } MutableFloat bestTestLoglik = new MutableFloat(-9999999); //Initialize z by mean model if (initializeBoostRate > 0 && reg == 2) { _logger.info("Now start mean model initializing......"); // Different paths for L1 vs L2 set from job file String initalModelPath; initalModelPath = outBasePath + "/initialModel"; Path initalModelPathFromNaiveTrain = new Path(outBasePath, "models"); JobConfig propsIni = JobConfig.clone(props); if (!propsIni.containsKey(LIBLINEAR_EPSILON)) { propsIni.put(LIBLINEAR_EPSILON, 0.01); } propsIni.put(RegressionNaiveTrain.HEAVY_PER_ITEM_TRAIN, "true"); propsIni.put(LAMBDA_MAP, lambdaMapPath); propsIni.put(REMOVE_TMP_DIR, "false"); // run job RegressionNaiveTrain initializationJob = new RegressionNaiveTrain( super.getJobId() + "_ADMMInitialization", propsIni); initializationJob.run(); FileSystem fs = initalModelPathFromNaiveTrain.getFileSystem(conf); if (fs.exists(new Path(initalModelPath))) { fs.delete(new Path(initalModelPath), true); } fs.rename(initalModelPathFromNaiveTrain, new Path(initalModelPath)); // set up lambda Set<Float> lambdaSet = new HashSet<Float>(); for (String l : lambdastr) { lambdaSet.add(Float.parseFloat(l)); } // Compute Mean model as initial model z = LinearModelUtils.meanModel(conf, initalModelPath, nblocks, lambdaSet.size(), true); if (testLoglikPerIter) { updateLogLikBestModel(conf, 0, z, testPath, ignoreValue, bestTestLoglik, outBasePath, numClickReplicates); } } double mindiff = 99999999; float liblinearEpsilon = 0.01f; int i; for (i = 1; i <= niter; i++) { _logger.info("Now starting iteration " + String.valueOf(i)); // set up configuration props.put(AbstractAvroJob.OUTPUT_PATH, outBasePath + "/iter-" + String.valueOf(i)); conf = createJobConf(AdmmMapper.class, AdmmReducer.class, Pair.getPairSchema(Schema.create(Type.INT), RegressionPrepareOutput.SCHEMA$), RegressionTrainOutput.SCHEMA$); conf.setPartitionerClass(AdmmPartitioner.class); //AvroUtils.setSpecificReducerInput(conf, true); conf.setInt(NUM_BLOCKS, nblocks); //Added for L1/L2 conf.setInt(REGULARIZER, reg); conf.setLong(REPORT_FREQUENCY, props.getLong(REPORT_FREQUENCY, 1000000)); //boolean ignoreValue = props.getBoolean(BINARY_FEATURE, false); conf.setBoolean(BINARY_FEATURE, ignoreValue); conf.setBoolean(SHORT_FEATURE_INDEX, props.getBoolean(SHORT_FEATURE_INDEX, false)); boolean penalizeIntercept = props.getBoolean(PENALIZE_INTERCEPT, false); String interceptKey = props.getString(INTERCEPT_KEY, LibLinearDataset.INTERCEPT_NAME); conf.set(INTERCEPT_KEY, interceptKey); //int schemaType = props.getInt(SCHEMA_TYPE, 1); // compute and store u into file // u = uplusx - z String uPath = outBasePath + "/iter-" + String.valueOf(i) + "/u/part-r-00000.avro"; if (i == 1) { LinearModelUtils.writeLinearModel(conf, uPath, new HashMap<String, LinearModel>()); if (initializeBoostRate > 0 && reg == 2) { conf.setFloat(RHO_ADAPT_RATE, initializeBoostRate); } } else { String uplusxPath = outBasePath + "/iter-" + String.valueOf(i - 1) + "/model"; computeU(conf, uPath, uplusxPath, z); if (rhoAdaptCoefficient > 0) { float curRhoAdaptRate = (float) Math.exp(-(i - 1) * rhoAdaptCoefficient); conf.setFloat(RHO_ADAPT_RATE, curRhoAdaptRate); } } // write z into file String zPath = outBasePath + "/iter-" + String.valueOf(i) + "/init-value/part-r-00000.avro"; LinearModelUtils.writeLinearModel(conf, zPath, z); // run job String outpath = outBasePath + "/iter-" + String.valueOf(i) + "/model"; conf.set(U_PATH, uPath); conf.set(INIT_VALUE_PATH, zPath); conf.set(LAMBDA_RHO_MAP, rhoPath); if (i > 1 && mindiff < 0.001 && !aggressiveLiblinearEpsilonDecay) // need to get a more accurate estimate from liblinear { liblinearEpsilon = liblinearEpsilon / 10; } else if (aggressiveLiblinearEpsilonDecay && i > 5) { liblinearEpsilon = liblinearEpsilon / 10; } conf.setFloat(LIBLINEAR_EPSILON, liblinearEpsilon); //Added for logging aggressive decay _logger.info("Liblinear Epsilon for iter = " + String.valueOf(i) + " is: " + String.valueOf(liblinearEpsilon)); _logger.info("aggressiveLiblinearEpsilonDecay=" + aggressiveLiblinearEpsilonDecay); AvroOutputFormat.setOutputPath(conf, new Path(outpath)); AvroUtils.addAvroCacheFiles(conf, new Path(uPath)); AvroUtils.addAvroCacheFiles(conf, new Path(zPath)); AvroUtils.addAvroCacheFiles(conf, new Path(rhoPath)); conf.setNumReduceTasks(nblocks * lambdastr.size()); AvroJob.setInputSchema(conf, RegressionPrepareOutput.SCHEMA$); AvroUtils.runAvroJob(conf); // Load the result from the last iteration // compute z and u given x Map<String, LinearModel> xbar = LinearModelUtils.meanModel(conf, outpath, nblocks, lambdaRho.size(), true); Map<String, LinearModel> ubar = LinearModelUtils.meanModel(conf, uPath, nblocks, lambdaRho.size(), false); Map<String, LinearModel> lastz = new HashMap<String, LinearModel>(); for (String k : z.keySet()) { lastz.put(k, z.get(k).copy()); } for (String lambda : xbar.keySet()) { LinearModel thisz = z.get(lambda); thisz.clear(); float l = Float.parseFloat(lambda); float r = lambdaRho.get(l); double weight; //L2 regularization if (reg == 2) { _logger.info("Running code for regularizer = " + String.valueOf(reg)); weight = nblocks * r / (l + nblocks * r); Map<String, Double> weightmap = new HashMap<String, Double>(); for (String k : lambdaMap.keySet()) { weightmap.put(k, nblocks * r / (lambdaMap.get(k) + nblocks * r + 0.0)); } thisz.linearCombine(1.0, weight, xbar.get(lambda), weightmap); if (!ubar.isEmpty()) { thisz.linearCombine(1.0, weight, ubar.get(lambda), weightmap); } if (!penalizeIntercept) { if (ubar.isEmpty()) { thisz.setIntercept(xbar.get(lambda).getIntercept()); } else { thisz.setIntercept(xbar.get(lambda).getIntercept() + ubar.get(lambda).getIntercept()); } } z.put(lambda, thisz); } else { // L1 regularization _logger.info("Running code for regularizer = " + String.valueOf(reg)); weight = l / (r * nblocks + 0.0); Map<String, Double> weightmap = new HashMap<String, Double>(); for (String k : lambdaMap.keySet()) { weightmap.put(k, lambdaMap.get(k) / (r * nblocks + 0.0)); } // LinearModel thisz = new LinearModel(); thisz.linearCombine(1.0, 1.0, xbar.get(lambda)); if (!ubar.isEmpty()) { thisz.linearCombine(1.0, 1.0, ubar.get(lambda)); } // Iterative Thresholding Map<String, Double> thisCoefficients = thisz.getCoefficients(); for (String k : thisCoefficients.keySet()) { double val = thisCoefficients.get(k); if (val > weight) { thisCoefficients.put(k, val - weight); } else if (val < -weight) { thisCoefficients.put(k, val + weight); } } thisz.setCoefficients(thisCoefficients); if (!penalizeIntercept) { if (ubar.isEmpty()) { thisz.setIntercept(xbar.get(lambda).getIntercept()); } else { thisz.setIntercept(xbar.get(lambda).getIntercept() + ubar.get(lambda).getIntercept()); } } z.put(lambda, thisz); } } xbar.clear(); ubar.clear(); // Output max difference between last z and this z mindiff = 99999999; double maxdiff = 0; for (String k : z.keySet()) { LinearModel tmp = lastz.get(k); if (tmp == null) tmp = new LinearModel(); tmp.linearCombine(1, -1, z.get(k)); double diff = tmp.maxAbsValue(); _logger.info( "For lambda=" + k + ": Max Difference between last z and this z = " + String.valueOf(diff)); tmp.clear(); if (mindiff > diff) mindiff = diff; if (maxdiff < diff) maxdiff = diff; } double epsilon = props.getDouble(EPSILON, 0.0001); // remove tmp files? if (props.getBoolean(REMOVE_TMP_DIR, false) && i >= 2) { FileSystem fs = FileSystem.get(conf); fs.delete(new Path(outBasePath + "/iter-" + String.valueOf(i - 1)), true); } // Output testloglik and update best model if (testLoglikPerIter) { updateLogLikBestModel(conf, i, z, testPath, ignoreValue, bestTestLoglik, outBasePath, numClickReplicates); } if (maxdiff < epsilon && liblinearEpsilon <= 0.00001) { break; } } // write z into file String zPath = outBasePath + "/final-model/part-r-00000.avro"; LinearModelUtils.writeLinearModel(conf, zPath, z); // remove tmp files? if (props.getBoolean(REMOVE_TMP_DIR, false)) { FileSystem fs = FileSystem.get(conf); Path initalModelPath = new Path(outBasePath + "/initialModel"); if (fs.exists(initalModelPath)) { fs.delete(initalModelPath, true); } for (int j = i - 2; j <= i; j++) { Path deletepath = new Path(outBasePath + "/iter-" + String.valueOf(j)); if (fs.exists(deletepath)) { fs.delete(deletepath, true); } } fs.delete(new Path(outBasePath + "/tmp-data"), true); } }