List of usage examples for org.apache.hadoop.fs Path SEPARATOR
String SEPARATOR
To view the source code for org.apache.hadoop.fs Path SEPARATOR.
Click Source Link
From source file:skewtune.mapreduce.STJobTracker.java
License:Apache License
public static String getLocalSplitFile(String user, String jobid) { return getLocalJobDir(user, jobid) + Path.SEPARATOR + LOCAL_SPLIT_FILE; }
From source file:skewtune.mapreduce.STJobTracker.java
License:Apache License
public static String getLocalSplitMetaFile(String user, String jobid) { return getLocalJobDir(user, jobid) + Path.SEPARATOR + LOCAL_SPLIT_META_FILE; }
From source file:tajo.engine.planner.physical.ExternalSortExec.java
License:Apache License
public ExternalSortExec(final TaskAttemptContext context, final StorageManager sm, final SortNode plan, final PhysicalExec child) throws IOException { super(context, plan.getInSchema(), plan.getOutSchema(), child); this.annotation = plan; this.sm = sm; this.SORT_BUFFER_SIZE = context.getConf().getIntVar(ConfVars.EXT_SORT_BUFFER); this.comparator = new TupleComparator(inSchema, plan.getSortKeys()); this.tupleSlots = new ArrayList<>(SORT_BUFFER_SIZE); this.workDir = new Path(context.getWorkDir().toURI() + Path.SEPARATOR + UUID.randomUUID()); this.localFS = FileSystem.getLocal(context.getConf()); meta = TCatUtil.newTableMeta(inSchema, StoreType.ROWFILE); }
From source file:test.hiveserver.parse.SemanticAnalyzer.java
License:Apache License
private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias, RowResolver rwsch) throws SemanticException { if (!qbp.isAnalyzeCommand()) { tsDesc.setGatherStats(false);//from w w w. j av a 2 s. c om } else { tsDesc.setGatherStats(true); // append additional virtual columns for storing statistics Iterator<VirtualColumn> vcs = VirtualColumn.getStatsRegistry(conf).iterator(); List<VirtualColumn> vcList = new ArrayList<VirtualColumn>(); while (vcs.hasNext()) { VirtualColumn vc = vcs.next(); rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), vc.getTypeInfo(), alias, true, vc.getIsHidden())); vcList.add(vc); } tsDesc.addVirtualCols(vcList); String tblName = tab.getTableName(); org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec tblSpec = qbp.getTableSpec(alias); Map<String, String> partSpec = tblSpec.getPartSpec(); if (partSpec != null) { List<String> cols = new ArrayList<String>(); cols.addAll(partSpec.keySet()); tsDesc.setPartColumns(cols); } // Theoretically the key prefix could be any unique string shared // between TableScanOperator (when publishing) and StatsTask (when aggregating). // Here we use // table_name + partitionSec // as the prefix for easy of read during explain and debugging. // Currently, partition spec can only be static partition. String k = tblName + Path.SEPARATOR; tsDesc.setStatsAggPrefix(k); // set up WritenEntity for replication outputs.add(new WriteEntity(tab, true)); // add WriteEntity for each matching partition if (tab.isPartitioned()) { if (partSpec == null) { throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg()); } List<Partition> partitions = qbp.getTableSpec().partitions; if (partitions != null) { for (Partition partn : partitions) { // inputs.add(new ReadEntity(partn)); // is this needed at all? outputs.add(new WriteEntity(partn, true)); } } } } }
From source file:testjar.GenerateTaskChildProcess.java
License:Apache License
/** * It uses for creating the child processes for a task. * @param conf configuration for a job.//from ww w . ja va2 s .c o m * @param jobName the name of the mapper job. * @throws IOException if an I/O error occurs. */ private static void createChildProcess(JobConf conf, String jobName) throws IOException { FileSystem fs = FileSystem.getLocal(conf); File TMP_ROOT_DIR = new File("/tmp"); String TEST_ROOT_DIR = TMP_ROOT_DIR.getAbsolutePath() + Path.SEPARATOR + "ChildProc_" + jobName; Path scriptDir = new Path(TEST_ROOT_DIR); int numOfChildProcesses = 2; if (fs.exists(scriptDir)) { fs.delete(scriptDir, true); } fs.mkdirs(scriptDir); fs.setPermission(scriptDir, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); String scriptDirName = scriptDir.toUri().getPath(); Random rm = new Random(); String scriptName = "ShellScript_" + jobName + "_" + rm.nextInt() + ".sh"; Path scriptPath = new Path(scriptDirName, scriptName); String shellScript = scriptPath.toString(); String script = null; if (jobName.equals("AppendStr")) { script = "#!/bin/sh\n" + "umask 000\n" + "StrVal=\"Hadoop is framework for data intensive " + "distributed applications.\"\n" + "StrVal=\"${StrVal}Hadoop enables applications to work " + "with thousands of nodes.\"\n" + "echo $StrVal\n" + "if [ \"X$1\" != \"X0\" ]\nthen\n" + " sh " + shellScript + " $(($1-1))\n" + "else\n" + " while(true)\n" + " do\n" + " StrVal=\"$StrVal Hadoop \"\n" + " done\n" + "fi"; } else if (jobName.equals("DispStr")) { script = "#!/bin/sh\n" + "umask 000\n" + "msg=Welcome\n" + "echo $msg\n" + " if [ \"X$1\" != \"X0\" ]\nthen\n" + " sh " + shellScript + " $(($1-1))\n" + "else\n" + " while(true)\n" + " do\n" + " sleep 2 \n" + " done\n" + "fi"; } else { script = "#!/bin/sh\n" + "umask 000\n" + "msg=Welcome\n" + "echo $msg\n" + " if [ \"X$1\" != \"X0\" ]\nthen\n" + " sh " + shellScript + " $(($1-1))\n" + "else\n" + " for count in {1..1000}\n" + " do\n" + " echo \"$msg_$count\" \n" + " done\n" + "fi"; } DataOutputStream file = fs.create(scriptPath); file.writeBytes(script); file.close(); File scriptFile = new File(scriptDirName, scriptName); scriptFile.setExecutable(true); LOG.info("script absolute path:" + scriptFile.getAbsolutePath()); String[] cmd = new String[] { scriptFile.getAbsolutePath(), String.valueOf(numOfChildProcesses) }; ShellCommandExecutor shellExec = new ShellCommandExecutor(cmd); shellExec.execute(); }
From source file:voldemort.store.readonly.mr.azkaban.VoldemortMultiStoreBuildAndPushJob.java
License:Apache License
@Override public void run() throws Exception { // Mapping of Pair [ cluster url, store name ] to List of previous node // directories. // Required for rollback... Multimap<Pair<String, String>, Pair<Integer, String>> previousNodeDirPerClusterStore = HashMultimap .create();//from w w w. j a v a2 s . co m // Retrieve filesystem information for checking if folder exists final FileSystem fs = outputDir.getFileSystem(new Configuration()); // Step 1 ) Order the stores depending on the size of the store TreeMap<Long, String> storeNameSortedBySize = Maps.newTreeMap(); for (String storeName : storeNames) { storeNameSortedBySize.put(sizeOfPath(fs, inputDirsPerStore.get(storeName)), storeName); } log.info("Store names along with their input file sizes - " + storeNameSortedBySize); // This will collect it in ascending order of size this.storeNames = Lists.newArrayList(storeNameSortedBySize.values()); // Reverse it such that is in descending order of size Collections.reverse(this.storeNames); log.info("Store names in the order of which we'll run build and push - " + this.storeNames); // Step 2 ) Get the push version if set final long pushVersion = props.containsKey("push.version.timestamp") ? Long.parseLong(new SimpleDateFormat("yyyyMMddHHmmss").format(new Date())) : props.getLong("push.version", -1L); // Mapping of Pair [ cluster url, store name ] to Future with list of // node dirs HashMap<Pair<String, String>, Future<List<String>>> fetchDirsPerStoreCluster = Maps.newHashMap(); // Store mapping of url to cluster metadata final ConcurrentHashMap<String, Cluster> urlToCluster = new ConcurrentHashMap<String, Cluster>(); // Mapping of Pair [ cluster url, store name ] to List of node // directories final HashMap<Pair<String, String>, List<String>> nodeDirPerClusterStore = new HashMap<Pair<String, String>, List<String>>(); // Iterate over all of them and check if they are complete final HashMap<Pair<String, String>, Exception> exceptions = Maps.newHashMap(); ExecutorService executor = null; try { executor = Executors.newFixedThreadPool(props.getInt("build.push.parallel", 1)); // Step 3 ) Start the building + pushing of all stores in parallel for (final String storeName : storeNames) { // Go over every cluster and do the build phase for (int index = 0; index < clusterUrls.size(); index++) { final String url = clusterUrls.get(index); fetchDirsPerStoreCluster.put(Pair.create(url, storeName), executor.submit(new Callable<List<String>>() { @Override public List<String> call() throws Exception { log.info("========= Working on build + push phase for store '" + storeName + "' and cluster '" + url + "' =========="); // Create an admin // client which will be // used by // everyone AdminClient adminClient = null; // Executor inside // executor - your mind // just // exploded! ExecutorService internalExecutor = null; try { // Retrieve admin // client for // verification of // schema + pushing adminClient = new AdminClient(url, new AdminClientConfig()); // Verify the store // exists ( If not, // add it // the // store ) Pair<StoreDefinition, Cluster> metadata = verifySchema(storeName, url, inputDirsPerStore.get(storeName), adminClient); // Populate the url // to cluster // metadata urlToCluster.put(url, metadata.getSecond()); // Create output // directory path URI uri = new URI(url); Path outputDirPath = new Path(outputDir + Path.SEPARATOR + storeName, uri.getHost()); log.info("Running build phase for store '" + storeName + "' and url '" + url + "'. Reading from input directory '" + inputDirsPerStore.get(storeName) + "' and writing to " + outputDirPath); runBuildStore(metadata.getSecond(), metadata.getFirst(), inputDirsPerStore.get(storeName), outputDirPath); log.info("Finished running build phase for store " + storeName + " and url '" + url + "'. Written to directory " + outputDirPath); long storePushVersion = pushVersion; if (storePushVersion == -1L) { log.info("Retrieving version number for store '" + storeName + "' and cluster '" + url + "'"); Map<String, Long> pushVersions = adminClient .getROMaxVersion(Lists.newArrayList(storeName)); if (pushVersions == null || !pushVersions.containsKey(storeName)) { throw new RuntimeException( "Could not retrieve version for store '" + storeName + "'"); } storePushVersion = pushVersions.get(storeName); storePushVersion++; log.info("Retrieved max version number for store '" + storeName + "' and cluster '" + url + "' = " + storePushVersion); } log.info("Running push for cluster url " + url); // Used for // parallel pushing internalExecutor = Executors.newCachedThreadPool(); AdminStoreSwapper swapper = new AdminStoreSwapper(metadata.getSecond(), internalExecutor, adminClient, 1000 * props.getInt("timeout.seconds", 24 * 60 * 60), true, true); // Convert to // hadoop specific // path String outputDirPathString = outputDirPath.makeQualified(fs).toString(); if (!fs.exists(outputDirPath)) { throw new RuntimeException("Output directory for store " + storeName + " and cluster '" + url + "' - " + outputDirPathString + " does not exist"); } log.info("Pushing data to store '" + storeName + "' on cluster " + url + " from path " + outputDirPathString + " with version " + storePushVersion); List<String> nodeDirs = swapper.invokeFetch(storeName, outputDirPathString, storePushVersion); log.info("Successfully pushed data to store '" + storeName + "' on cluster " + url + " from path " + outputDirPathString + " with version " + storePushVersion); return nodeDirs; } finally { if (internalExecutor != null) { internalExecutor.shutdownNow(); internalExecutor.awaitTermination(10, TimeUnit.SECONDS); } if (adminClient != null) { adminClient.stop(); } } } })); } } for (final String storeName : storeNames) { for (int index = 0; index < clusterUrls.size(); index++) { Pair<String, String> key = Pair.create(clusterUrls.get(index), storeName); Future<List<String>> nodeDirs = fetchDirsPerStoreCluster.get(key); try { nodeDirPerClusterStore.put(key, nodeDirs.get()); } catch (Exception e) { exceptions.put(key, e); } } } } finally { if (executor != null) { executor.shutdownNow(); executor.awaitTermination(10, TimeUnit.SECONDS); } } // ===== If we got exceptions during the build + push, delete data from // successful // nodes ====== if (!exceptions.isEmpty()) { log.error("Got an exception during pushes. Deleting data already pushed on successful nodes"); for (int index = 0; index < clusterUrls.size(); index++) { String clusterUrl = clusterUrls.get(index); Cluster cluster = urlToCluster.get(clusterUrl); AdminClient adminClient = null; try { adminClient = new AdminClient(cluster, new AdminClientConfig()); for (final String storeName : storeNames) { // Check if the [ cluster , store name ] succeeded. We // need to roll it back Pair<String, String> key = Pair.create(clusterUrl, storeName); if (nodeDirPerClusterStore.containsKey(key)) { List<String> nodeDirs = nodeDirPerClusterStore.get(key); log.info("Deleting data for successful pushes to " + clusterUrl + " and store " + storeName); int nodeId = 0; for (String nodeDir : nodeDirs) { try { log.info("Deleting data ( " + nodeDir + " ) for successful pushes to '" + clusterUrl + "' and store '" + storeName + "' and node " + nodeId); adminClient.failedFetchStore(nodeId, storeName, nodeDir); log.info("Successfully deleted data for successful pushes to '" + clusterUrl + "' and store '" + storeName + "' and node " + nodeId); } catch (Exception e) { log.error("Failure while deleting data on node " + nodeId + " for store '" + storeName + "' and url '" + clusterUrl + "'"); } nodeId++; } } } } finally { if (adminClient != null) { adminClient.stop(); } } } int errorNo = 1; for (Pair<String, String> key : exceptions.keySet()) { log.error("Error no " + errorNo + "] Error pushing for cluster '" + key.getFirst() + "' and store '" + key.getSecond() + "' :", exceptions.get(key)); errorNo++; } throw new VoldemortException("Exception during build + push"); } // ====== Delete the temporary directory since we don't require it // ====== if (!props.getBoolean("build.output.keep", false)) { JobConf jobConf = new JobConf(); if (props.containsKey("hadoop.job.ugi")) { jobConf.set("hadoop.job.ugi", props.getString("hadoop.job.ugi")); } log.info("Deleting output directory since we have finished the pushes " + outputDir); HadoopUtils.deletePathIfExists(jobConf, outputDir.toString()); log.info("Successfully deleted output directory since we have finished the pushes" + outputDir); } // ====== Time to swap the stores one node at a time ======== try { for (int index = 0; index < clusterUrls.size(); index++) { String url = clusterUrls.get(index); Cluster cluster = urlToCluster.get(url); AdminClient adminClient = new AdminClient(cluster, new AdminClientConfig()); log.info("Swapping all stores on cluster " + url); try { // Go over every node and swap for (Node node : cluster.getNodes()) { log.info("Swapping all stores on cluster " + url + " and node " + node.getId()); // Go over every store and swap for (String storeName : storeNames) { Pair<String, String> key = Pair.create(url, storeName); log.info("Swapping '" + storeName + "' store on cluster " + url + " and node " + node.getId() + " - " + nodeDirPerClusterStore.get(key).get(node.getId())); previousNodeDirPerClusterStore.put(key, Pair.create(node.getId(), adminClient.swapStore(node.getId(), storeName, nodeDirPerClusterStore.get(key).get(node.getId())))); log.info("Successfully swapped '" + storeName + "' store on cluster " + url + " and node " + node.getId()); } } } finally { if (adminClient != null) { adminClient.stop(); } } } } catch (Exception e) { log.error("Got an exception during swaps. Rolling back data already pushed on successful nodes"); for (Pair<String, String> clusterStoreTuple : previousNodeDirPerClusterStore.keySet()) { Collection<Pair<Integer, String>> nodeToPreviousDirs = previousNodeDirPerClusterStore .get(clusterStoreTuple); String url = clusterStoreTuple.getFirst(); Cluster cluster = urlToCluster.get(url); log.info("Rolling back for cluster " + url + " and store " + clusterStoreTuple.getSecond()); AdminClient adminClient = new AdminClient(cluster, new AdminClientConfig()); try { for (Pair<Integer, String> nodeToPreviousDir : nodeToPreviousDirs) { log.info("Rolling back for cluster " + url + " and store " + clusterStoreTuple.getSecond() + " and node " + nodeToPreviousDir.getFirst() + " to dir " + nodeToPreviousDir.getSecond()); adminClient.rollbackStore(nodeToPreviousDir.getFirst(), nodeToPreviousDir.getSecond(), ReadOnlyUtils.getVersionId(new File(nodeToPreviousDir.getSecond()))); log.info("Successfully rolled back for cluster " + url + " and store " + clusterStoreTuple.getSecond() + " and node " + nodeToPreviousDir.getFirst() + " to dir " + nodeToPreviousDir.getSecond()); } } finally { if (adminClient != null) { adminClient.stop(); } } } throw e; } }