List of usage examples for org.apache.hadoop.fs Path makeQualified
@Deprecated
public Path makeQualified(FileSystem fs)
From source file:voldemort.store.readonly.mr.azkaban.VoldemortSwapJob.java
License:Apache License
public void run() throws Exception { String dataDir = swapConf.getDataDir(); String storeName = swapConf.getStoreName(); int httpTimeoutMs = swapConf.getHttpTimeoutMs(); long pushVersion = swapConf.getPushVersion(); Cluster cluster = swapConf.getCluster(); ExecutorService executor = Executors.newCachedThreadPool(); // Read the hadoop configuration settings JobConf conf = new JobConf(); Path dataPath = new Path(dataDir); dataDir = dataPath.makeQualified(FileSystem.get(conf)).toString(); /*/*from www .j a v a 2s .c o m*/ * Replace the default protocol and port with the one derived as above */ String existingProtocol = ""; String existingPort = ""; String[] pathComponents = dataDir.split(":"); if (pathComponents.length >= 3) { existingProtocol = pathComponents[0]; existingPort = pathComponents[2].split("/")[0]; } info("Existing protocol = " + existingProtocol + " and port = " + existingPort); if (hdfsFetcherProtocol.length() > 0 && hdfsFetcherPort.length() > 0) { dataDir = dataDir.replaceFirst(existingProtocol, this.hdfsFetcherProtocol); dataDir = dataDir.replaceFirst(existingPort, this.hdfsFetcherPort); } // Create admin client AdminClient client = new AdminClient(cluster, new AdminClientConfig().setMaxConnectionsPerNode(cluster.getNumberOfNodes()) .setAdminConnectionTimeoutSec(httpTimeoutMs / 1000) .setMaxBackoffDelayMs(swapConf.getMaxBackoffDelayMs())); if (pushVersion == -1L) { // Need to retrieve max version ArrayList<String> stores = new ArrayList<String>(); stores.add(storeName); Map<String, Long> pushVersions = client.getROMaxVersion(stores); if (pushVersions == null || !pushVersions.containsKey(storeName)) { throw new RuntimeException("Push version could not be determined for store " + storeName); } pushVersion = pushVersions.get(storeName); pushVersion++; } // do the swap info("Initiating swap of " + storeName + " with dataDir:" + dataDir); AdminStoreSwapper swapper = new AdminStoreSwapper(cluster, executor, client, httpTimeoutMs, swapConf.getRollback(), swapConf.getRollback()); swapper.swapStoreData(storeName, dataDir, pushVersion); info("Swap complete."); executor.shutdownNow(); executor.awaitTermination(10, TimeUnit.SECONDS); }
From source file:voldemort.store.readonly.mr.HadoopStoreJobRunner.java
License:Apache License
private static void addDepJars(Configuration conf, Class<?>[] deps, List<String> additionalJars) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> depJars = new HashSet<String>(); for (Class<?> dep : deps) { String tmp = findInClasspath(dep.getCanonicalName()); if (tmp != null) { Path path = new Path(tmp); depJars.add(path.makeQualified(localFs).toString()); }//w ww . j av a2 s . c om } for (String additional : additionalJars) { Path path = new Path(additional); depJars.add(path.makeQualified(localFs).toString()); } String[] tmpjars = conf.get("tmpjars", "").split(","); for (String tmpjar : tmpjars) { if (!StringUtils.isEmpty(tmpjar)) { depJars.add(tmpjar.trim()); } } conf.set("tmpjars", StringUtils.join(depJars.iterator(), ',')); }
From source file:voldemort.store.readwrite.mr.HadoopRWStoreJobRunner.java
License:Apache License
public static void addDepJars(Configuration conf, Class<?>[] deps, List<String> additionalJars) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> depJars = new HashSet<String>(); for (Class<?> dep : deps) { String tmp = findInClasspath(dep.getCanonicalName()); if (tmp != null) { Path path = new Path(tmp); depJars.add(path.makeQualified(localFs).toString()); }//from ww w . j a v a 2 s. com } for (String additional : additionalJars) { Path path = new Path(additional); depJars.add(path.makeQualified(localFs).toString()); } String[] tmpjars = conf.get("tmpjars", "").split(","); for (String tmpjar : tmpjars) { if (!StringUtils.isEmpty(tmpjar)) { depJars.add(tmpjar.trim()); } } conf.set("tmpjars", StringUtils.join(depJars.iterator(), ',')); }
From source file:weka.distributed.spark.CanopyClustererSparkJob.java
License:Open Source License
protected void writeCanopyAssignments(String outputPath, JavaPairRDD<Integer, ?> assignments) throws IOException { String[] pathOnly = new String[1]; Configuration conf = getFSConfigurationForPath(outputPath, pathOnly); FileSystem fs = FileSystem.get(conf); Path p = new Path(pathOnly[0]); p = p.makeQualified(fs); assignments.saveAsNewAPIHadoopFile(pathOnly[0], NullWritable.class, Text.class, SparkJob.NoKeyTextOutputFormat.class, conf); }
From source file:weka.distributed.spark.RandomizedDataSparkJob.java
License:Open Source License
/** * Writes the randomized splits out to the output directory. Spark ensures * that each key goes to a separate part-xxxxx file in the output. * * @param outputPath the path to write to * @param sortedByFold the RDD containing the sorted dataset keyed by fold * @throws IOException if a problem occurs *//*from www .j a v a 2 s . c o m*/ protected void writeRandomizedSplits(String outputPath, JavaRDD<Instance> sortedByFold) throws IOException { String[] pathOnly = new String[1]; Configuration conf = getFSConfigurationForPath(outputPath, pathOnly); FileSystem fs = FileSystem.get(conf); Path p = new Path(pathOnly[0]); p = p.makeQualified(fs); if (!p.toString().toLowerCase().startsWith("file:/")) { m_finalOutputDirectory = p.toString(); } else { m_finalOutputDirectory = pathOnly[0]; } if (m_saveTextualSplits) { sortedByFold.saveAsTextFile(m_finalOutputDirectory); } else { sortedByFold.saveAsObjectFile(m_finalOutputDirectory); } // sortedByFold.saveAsNewAPIHadoopFile(pathOnly[0], NullWritable.class, // Text.class, SparkJob.NoKeyTextOutputFormat.class, conf); }
From source file:weka.distributed.spark.RandomizedDataSparkJob.java
License:Open Source License
protected void writeRandomizedSplits(String outputPath, JavaPairRDD<Integer, Object> sortedByFold) throws IOException { String[] pathOnly = new String[1]; Configuration conf = getFSConfigurationForPath(outputPath, pathOnly); FileSystem fs = FileSystem.get(conf); Path p = new Path(pathOnly[0]); p = p.makeQualified(fs); if (!p.toString().toLowerCase().startsWith("file:/")) { m_finalOutputDirectory = p.toString(); } else {// w ww .ja v a 2 s.c o m m_finalOutputDirectory = pathOnly[0]; } // sortedByFold.saveAsNewAPIHadoopFile(pathOnly[0], NullWritable.class, // Text.class, SparkJob.NoKeyTextOutputFormat.class, conf); // sortedByFold.saveAsNewAPIHadoopFile(pathOnly[0], IntWritable.class, Text.class, TextOutputFormat.class, conf); }
From source file:weka.distributed.spark.SparkUtils.java
License:Open Source License
/** * Takes an input path and returns a fully qualified absolute one. Handles * local and non-local file systems. Original path can be a relative one. In * the case of a local file system, the relative path (relative to the working * directory) is made into an absolute one. In the case of a file system such * as HDFS, an absolute path will require an additional '/' - E.g. * {@code hdfs://host:port//users/fred/input} - otherwise it will be treated as * relative (to the user's home directory in HDFS). In either case, the * returned path will be an absolute one. * * @param original original path (either relative or absolute) on a file * system//w w w . ja va 2 s . com * @return absolute path * @throws IOException if a problem occurs */ public static String resolveLocalOrOtherFileSystemPath(String original) throws IOException { String[] pathOnly = new String[1]; Configuration conf = getFSConfigurationForPath(original, pathOnly); String result; FileSystem fs = FileSystem.get(conf); Path p = new Path(pathOnly[0]); p = p.makeQualified(fs); if (!p.toString().toLowerCase().startsWith("file:/")) { result = p.toString(); } else { result = pathOnly[0]; } return result; }
From source file:weka.distributed.spark.WekaScoringSparkJob.java
License:Open Source License
/** * Write the supplied data to the output directory * * @param outputPath the path to write to * @param scoredData the data to write// w w w. jav a2s. c o m * @throws IOException if a problem occurs */ protected void writeScoredData(String outputPath, JavaRDD<Instance> scoredData) throws IOException { String[] pathOnly = new String[1]; Configuration conf = getFSConfigurationForPath(outputPath, pathOnly); FileSystem fs = FileSystem.get(conf); Path p = new Path(pathOnly[0]); p = p.makeQualified(fs); String finalOutputDir = ""; if (!p.toString().toLowerCase().startsWith("file:/")) { finalOutputDir = p.toString(); } else { finalOutputDir = pathOnly[0]; } scoredData.saveAsTextFile(finalOutputDir); // write the header of the scored data out to the output directory in both // ARFF and CSV format String outArff = outputPath + "/scoredHeader.arff"; String outCSV = outputPath + "/scoredHeader.csv"; PrintWriter writer = null; try { writer = openTextFileForWrite(outArff); writer.println(m_scoredOutputFormat); writer.flush(); writer.close(); writer = openTextFileForWrite(outCSV); StringBuilder b = new StringBuilder(); for (int i = 0; i < m_scoredOutputFormat.numAttributes(); i++) { b.append(m_scoredOutputFormat.attribute(i).name()).append(","); } b.setLength(b.length() - 1); writer.println(b.toString()); writer.flush(); writer.close(); } finally { if (writer != null) { writer.close(); } } }