List of usage examples for org.apache.hadoop.fs FileSystem setPermission
public void setPermission(Path p, FsPermission permission) throws IOException
From source file:gaffer.accumulostore.operation.hdfs.handler.tool.ImportElementsToAccumulo.java
License:Apache License
@Override public int run(final String[] strings) throws Exception { // Hadoop configuration final Configuration conf = getConf(); final FileSystem fs = FileSystem.get(conf); // Make the failure directory fs.mkdirs(operation.getFailurePath()); fs.setPermission(operation.getFailurePath(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); // Remove the _SUCCESS file to prevent warning in accumulo fs.delete(new Path(operation.getOutputPath().toString() + "/_SUCCESS"), false); // Set all permissions IngestUtils.setDirectoryPermsForAccumulo(fs, operation.getOutputPath()); // Import the files connector.tableOperations().importDirectory(table, operation.getOutputPath().toString(), operation.getFailurePath().toString(), false); return SUCCESS_RESPONSE; }
From source file:gaffer.accumulostore.utils.IngestUtils.java
License:Apache License
/** * Modify the permissions on a directory and its contents to allow Accumulo * access./* ww w. java 2 s .c om*/ * <p> * * @param fs - The FileSystem in which to create the splits file * @param dirPath - The Path to the directory * @throws IOException for any IO issues interacting with the file system. */ public static void setDirectoryPermsForAccumulo(final FileSystem fs, final Path dirPath) throws IOException { if (!fs.getFileStatus(dirPath).isDirectory()) { throw new RuntimeException(dirPath + " is not a directory"); } LOGGER.info("Setting permission {} on directory {} and all files within", ACC_DIR_PERMS, dirPath); fs.setPermission(dirPath, ACC_DIR_PERMS); for (final FileStatus file : fs.listStatus(dirPath)) { fs.setPermission(file.getPath(), ACC_FILE_PERMS); } }
From source file:gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter.java
License:Apache License
private void ensureDirectoryExists(FileSystem fs, Path path, Iterator<OwnerAndPermission> ownerAndPermissionIterator) throws IOException { if (fs.exists(path)) { return;/*from w w w .jav a 2s . c o m*/ } if (ownerAndPermissionIterator.hasNext()) { OwnerAndPermission ownerAndPermission = ownerAndPermissionIterator.next(); if (path.getParent() != null) { ensureDirectoryExists(fs, path.getParent(), ownerAndPermissionIterator); } if (!fs.mkdirs(path)) { // fs.mkdirs returns false if path already existed. Do not overwrite permissions return; } if (ownerAndPermission.getFsPermission() != null) { log.debug("Applying permissions %s to path %s.", ownerAndPermission.getFsPermission(), path); fs.setPermission(path, addExecutePermissionToOwner(ownerAndPermission.getFsPermission())); } String group = ownerAndPermission.getGroup(); String owner = ownerAndPermission.getOwner(); if (group != null || owner != null) { log.debug("Applying owner %s and group %s to path %s.", owner, group, path); fs.setOwner(path, owner, group); } } else { fs.mkdirs(path); } }
From source file:gobblin.util.AvroUtils.java
License:Apache License
public static void writeSchemaToFile(Schema schema, Path filePath, FileSystem fs, boolean overwrite, FsPermission perm) throws IOException { if (!overwrite) { Preconditions.checkState(!fs.exists(filePath), filePath + " already exists"); } else {//from www. j a va 2 s . c o m HadoopUtils.deletePath(fs, filePath, true); } try (DataOutputStream dos = fs.create(filePath)) { dos.writeChars(schema.toString()); } fs.setPermission(filePath, perm); }
From source file:gobblin.util.HadoopUtils.java
License:Apache License
/** * Try to set owner and permissions for the path. Will not throw exception. *///from w ww .ja v a 2 s. c o m public static void setPermissions(Path location, Optional<String> owner, Optional<String> group, FileSystem fs, FsPermission permission) { try { if (!owner.isPresent()) { return; } if (!group.isPresent()) { return; } fs.setOwner(location, owner.get(), group.get()); fs.setPermission(location, permission); if (!fs.isDirectory(location)) { return; } for (FileStatus fileStatus : fs.listStatus(location)) { setPermissions(fileStatus.getPath(), owner, group, fs, permission); } } catch (IOException e) { log.warn("Exception occurred while trying to change permissions : " + e.getMessage()); } }
From source file:gobblin.util.WriterUtils.java
License:Apache License
/** * Create the given dir as well as all missing ancestor dirs. All created dirs will have the given permission. * This should be used instead of {@link FileSystem#mkdirs(Path, FsPermission)}, since that method only sets * the permission for the given dir, and not recursively for the ancestor dirs. * * @param fs FileSystem/* w ww .j a v a2 s . co m*/ * @param path The dir to be created * @param perm The permission to be set * @throws IOException if failing to create dir or set permission. */ public static void mkdirsWithRecursivePermission(FileSystem fs, Path path, FsPermission perm) throws IOException { if (fs.exists(path)) { return; } if (path.getParent() != null && !fs.exists(path.getParent())) { mkdirsWithRecursivePermission(fs, path.getParent(), perm); } if (!fs.mkdirs(path, perm)) { throw new IOException(String.format("Unable to mkdir %s with permission %s", path, perm)); } // Double check permission, since fs.mkdirs() may not guarantee to set the permission correctly if (!fs.getFileStatus(path).getPermission().equals(perm)) { fs.setPermission(path, perm); } }
From source file:io.hops.experiments.utils.DFSOperationsUtils.java
License:Apache License
public static void chmodPath(FileSystem dfs, String pathStr) throws IOException { if (SERVER_LESS_MODE) { serverLessModeRandomWait();// w w w . j a va2 s . c o m return; } dfs.setPermission(new Path(pathStr), new FsPermission((short) 0777)); }
From source file:io.hops.hopsworks.common.jobs.flink.AbstractYarnClusterDescriptor.java
License:Apache License
/** * This method will block until the ApplicationMaster/JobManager have been * deployed on YARN.// www .j av a 2 s.c o m */ protected YarnClusterClient deployInternal() throws Exception { isReadyForDeployment(); LOG.info("Using values:"); LOG.info("\tTaskManager count = {}", taskManagerCount); LOG.info("\tJobManager memory = {}", jobManagerMemoryMb); LOG.info("\tTaskManager memory = {}", taskManagerMemoryMb); final YarnClient yarnClient = getYarnClient(); // ------------------ Check if the specified queue exists -------------------- try { List<QueueInfo> queues = yarnClient.getAllQueues(); // check only if there are queues configured in yarn and for this session. if (queues.size() > 0 && this.yarnQueue != null) { boolean queueFound = false; for (QueueInfo queue : queues) { if (queue.getQueueName().equals(this.yarnQueue)) { queueFound = true; break; } } if (!queueFound) { String queueNames = ""; for (QueueInfo queue : queues) { queueNames += queue.getQueueName() + ", "; } LOG.warn("The specified queue '" + this.yarnQueue + "' does not exist. " + "Available queues: " + queueNames); } } else { LOG.debug("The YARN cluster does not have any queues configured"); } } catch (Throwable e) { LOG.warn("Error while getting queue information from YARN: " + e.getMessage()); if (LOG.isDebugEnabled()) { LOG.debug("Error details", e); } } // Create application via yarnClient final YarnClientApplication yarnApplication = yarnClient.createApplication(); GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse(); Map<String, String> jobSystemProperties = new HashMap<>(2); // Certificates are materialized locally so DFSClient can be set to null // LocalResources are not used by Flink, so set it null HopsUtils.copyUserKafkaCerts(services.getUserCerts(), project, username, services.getSettings().getHopsworksTmpCertDir(), services.getSettings().getHdfsTmpCertDir(), JobType.FLINK, null, null, jobSystemProperties, services.getSettings().getFlinkKafkaCertDir(), appResponse.getApplicationId().toString()); StringBuilder tmpBuilder = new StringBuilder(); for (Map.Entry<String, String> prop : jobSystemProperties.entrySet()) { String option = YarnRunner.escapeForShell("-D" + prop.getKey() + "=" + prop.getValue()); javaOptions.add(option); addHopsworksParam(option); tmpBuilder.append(prop.getKey()).append("=").append(prop.getValue()).append("@@"); } dynamicPropertiesEncoded += tmpBuilder.toString(); // ------------------ Add dynamic properties to local flinkConfiguraton ------ Map<String, String> dynProperties = getDynamicProperties(dynamicPropertiesEncoded); for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) { flinkConfiguration.setString(dynProperty.getKey(), dynProperty.getValue()); } // ------------------ Set default file system scheme ------------------------- try { org.apache.flink.core.fs.FileSystem.setDefaultScheme(flinkConfiguration); } catch (IOException e) { throw new IOException("Error while setting the default " + "filesystem scheme from configuration.", e); } // initialize file system // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path final FileSystem fs = FileSystem.get(conf); // hard coded check for the GoogleHDFS client because its not overriding the // getScheme() method. if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") && fs.getScheme().startsWith("file")) { LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the " + "specified Hadoop configuration path is wrong and the system is " + "using the default Hadoop configuration values. The Flink YARN " + "client needs to store its files in a distributed file system"); } // ------ Check if the YARN ClusterClient has the requested resources --- // the yarnMinAllocationMB specifies the smallest possible container // allocation size. all allocations below this value are automatically // set to this value. final int yarnMinAllocationMB = conf.getInt("yarn.scheduler.minimum-allocation-mb", 0); if (jobManagerMemoryMb < yarnMinAllocationMB || taskManagerMemoryMb < yarnMinAllocationMB) { LOG.warn("The JobManager or TaskManager memory is below the smallest possible " + "YARN Container size. The value of 'yarn.scheduler.minimum-allocation-mb'" + " is " + yarnMinAllocationMB + "'. Please increase the memory size." + "YARN will allocate the smaller containers but the scheduler will" + " account for the minimum-allocation-mb, maybe not all instances " + "you requested will start."); } // set the memory to minAllocationMB to do the next checks correctly if (jobManagerMemoryMb < yarnMinAllocationMB) { jobManagerMemoryMb = yarnMinAllocationMB; } if (taskManagerMemoryMb < yarnMinAllocationMB) { taskManagerMemoryMb = yarnMinAllocationMB; } Resource maxRes = appResponse.getMaximumResourceCapability(); final String NOTE = "Please check the 'yarn.scheduler.maximum-allocation-mb' and the " + "'yarn.nodemanager.resource.memory-mb' configuration values\n"; if (jobManagerMemoryMb > maxRes.getMemory()) { failSessionDuringDeployment(yarnClient, yarnApplication); throw new YarnDeploymentException("The cluster does not have the requested resources for the JobManager" + " available!\n" + "Maximum Memory: " + maxRes.getMemory() + "MB Requested: " + jobManagerMemoryMb + "MB. " + NOTE); } if (taskManagerMemoryMb > maxRes.getMemory()) { failSessionDuringDeployment(yarnClient, yarnApplication); throw new YarnDeploymentException( "The cluster does not have the requested resources for the TaskManagers available!\n" + "Maximum Memory: " + maxRes.getMemory() + " Requested: " + taskManagerMemoryMb + "MB. " + NOTE); } final String NOTE_RSC = "\nThe Flink YARN client will try to allocate the YARN session, " + "but maybe not all TaskManagers are connecting from the beginning " + "because the resources are currently not available in the cluster. " + "The allocation might take more time than usual because the Flink " + "YARN client needs to wait until the resources become available."; int totalMemoryRequired = jobManagerMemoryMb + taskManagerMemoryMb * taskManagerCount; ClusterResourceDescription freeClusterMem = getCurrentFreeClusterResources(yarnClient); if (freeClusterMem.totalFreeMemory < totalMemoryRequired) { LOG.warn("This YARN session requires " + totalMemoryRequired + "MB of memory in the cluster. " + "There are currently only " + freeClusterMem.totalFreeMemory + "MB available." + NOTE_RSC); } if (taskManagerMemoryMb > freeClusterMem.containerLimit) { LOG.warn("The requested amount of memory for the TaskManagers (" + taskManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC); } if (jobManagerMemoryMb > freeClusterMem.containerLimit) { LOG.warn( "The requested amount of memory for the JobManager (" + jobManagerMemoryMb + "MB) is more than " + "the largest possible YARN container: " + freeClusterMem.containerLimit + NOTE_RSC); } // ----------------- check if the requested containers fit into the cluster. int[] nmFree = Arrays.copyOf(freeClusterMem.nodeManagersFree, freeClusterMem.nodeManagersFree.length); // first, allocate the jobManager somewhere. if (!allocateResource(nmFree, jobManagerMemoryMb)) { LOG.warn("Unable to find a NodeManager that can fit the JobManager/Application master. " + "The JobManager requires " + jobManagerMemoryMb + "MB. NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + NOTE_RSC); } // allocate TaskManagers for (int i = 0; i < taskManagerCount; i++) { if (!allocateResource(nmFree, taskManagerMemoryMb)) { LOG.warn("There is not enough memory available in the YARN cluster. " + "The TaskManager(s) require " + taskManagerMemoryMb + "MB each. " + "NodeManagers available: " + Arrays.toString(freeClusterMem.nodeManagersFree) + "\n" + "After allocating the JobManager (" + jobManagerMemoryMb + "MB) and (" + i + "/" + taskManagerCount + ") TaskManagers, " + "the following NodeManagers are available: " + Arrays.toString(nmFree) + NOTE_RSC); } } Set<File> effectiveShipFiles = new HashSet<>(shipFiles.size()); for (File file : shipFiles) { effectiveShipFiles.add(file.getAbsoluteFile()); } //check if there is a logback or log4j file File logbackFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOGBACK_NAME); final boolean hasLogback = logbackFile.exists(); if (hasLogback) { effectiveShipFiles.add(logbackFile); } File log4jFile = new File(configurationDirectory + File.separator + CONFIG_FILE_LOG4J_NAME); final boolean hasLog4j = log4jFile.exists(); if (hasLog4j) { effectiveShipFiles.add(log4jFile); if (hasLogback) { // this means there is already a logback configuration file --> fail LOG.warn("The configuration directory ('" + configurationDirectory + "') contains both LOG4J and " + "Logback configuration files. Please delete or rename one of them."); } } addLibFolderToShipFiles(effectiveShipFiles); final ContainerLaunchContext amContainer = setupApplicationMasterContainer(hasLogback, hasLog4j); // Set-up ApplicationSubmissionContext for the application ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext(); final ApplicationId appId = appContext.getApplicationId(); // ------------------ Add Zookeeper namespace to local flinkConfiguraton ------ String zkNamespace = getZookeeperNamespace(); // no user specified cli argument for namespace? if (zkNamespace == null || zkNamespace.isEmpty()) { // namespace defined in config? else use applicationId as default. zkNamespace = flinkConfiguration.getString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY, String.valueOf(appId)); setZookeeperNamespace(zkNamespace); } flinkConfiguration.setString(ConfigConstants.ZOOKEEPER_NAMESPACE_KEY, zkNamespace); if (RecoveryMode.isHighAvailabilityModeActivated(flinkConfiguration)) { // activate re-execution of failed applications appContext.setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)); activateHighAvailabilitySupport(appContext); } else { // set number of application retries to 1 in the default case appContext .setMaxAppAttempts(flinkConfiguration.getInteger(ConfigConstants.YARN_APPLICATION_ATTEMPTS, 1)); } // local resource map for Yarn final Map<String, LocalResource> localResources = new HashMap<>(2 + effectiveShipFiles.size()); // list of remote paths (after upload) final List<Path> paths = new ArrayList<>(2 + effectiveShipFiles.size()); // classpath assembler final StringBuilder classPathBuilder = new StringBuilder(); // ship list that enables reuse of resources for task manager containers StringBuilder envShipFileList = new StringBuilder(); // upload and register ship files for (File shipFile : effectiveShipFiles) { LocalResource shipResources = Records.newRecord(LocalResource.class); Path shipLocalPath = new Path("file://" + shipFile.getAbsolutePath()); Path remotePath = Utils.setupLocalResource(fs, appId.toString(), shipLocalPath, shipResources, fs.getHomeDirectory()); paths.add(remotePath); localResources.put(shipFile.getName(), shipResources); classPathBuilder.append(shipFile.getName()); if (shipFile.isDirectory()) { // add directories to the classpath classPathBuilder.append(File.separator).append("*"); } classPathBuilder.append(File.pathSeparator); envShipFileList.append(remotePath).append(","); } //////////////////////////////////////////////////////////////////////////// /* * Add Hops LocalResources paths here * */ //Add it to localResources for (Entry<String, LocalResource> entry : hopsworksResources.entrySet()) { localResources.put(entry.getKey(), entry.getValue()); //Append name to classPathBuilder classPathBuilder.append(entry.getKey()); classPathBuilder.append(File.pathSeparator); } //////////////////////////////////////////////////////////////////////////// // Setup jar for ApplicationMaster LocalResource appMasterJar = Records.newRecord(LocalResource.class); LocalResource flinkConf = Records.newRecord(LocalResource.class); Path remotePathJar = Utils.setupLocalResource(fs, appId.toString(), flinkJarPath, appMasterJar, fs.getHomeDirectory()); Path remotePathConf = Utils.setupLocalResource(fs, appId.toString(), flinkConfigurationPath, flinkConf, fs.getHomeDirectory()); localResources.put("flink.jar", appMasterJar); localResources.put("flink-conf.yaml", flinkConf); paths.add(remotePathJar); classPathBuilder.append("flink.jar").append(File.pathSeparator); paths.add(remotePathConf); classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator); sessionFilesDir = new Path(fs.getHomeDirectory(), ".flink/" + appId.toString() + "/"); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE); fs.setPermission(sessionFilesDir, permission); // set permission for path. // setup security tokens Utils.setTokensFor(amContainer, paths, conf); amContainer.setLocalResources(localResources); fs.close(); // Setup CLASSPATH and environment variables for ApplicationMaster final Map<String, String> appMasterEnv = new HashMap<>(); // set user specified app master environment variables appMasterEnv.putAll(Utils.getEnvironmentVariables(ConfigConstants.YARN_APPLICATION_MASTER_ENV_PREFIX, flinkConfiguration)); // set Flink app class path appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString()); // set Flink on YARN internal configuration values appMasterEnv.put(YarnConfigKeys.ENV_TM_COUNT, String.valueOf(taskManagerCount)); appMasterEnv.put(YarnConfigKeys.ENV_TM_MEMORY, String.valueOf(taskManagerMemoryMb)); appMasterEnv.put(YarnConfigKeys.FLINK_JAR_PATH, remotePathJar.toString()); appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fs.getHomeDirectory().toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, envShipFileList.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_USERNAME, UserGroupInformation.getCurrentUser().getShortUserName()); appMasterEnv.put(YarnConfigKeys.ENV_SLOTS, String.valueOf(slots)); appMasterEnv.put(YarnConfigKeys.ENV_DETACHED, String.valueOf(detached)); appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE, getZookeeperNamespace()); if (dynamicPropertiesEncoded != null) { appMasterEnv.put(YarnConfigKeys.ENV_DYNAMIC_PROPERTIES, dynamicPropertiesEncoded); } // set classpath from YARN configuration Utils.setupYarnClassPath(conf, appMasterEnv); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(jobManagerMemoryMb); capability.setVirtualCores(1); String name; if (customName == null) { name = "Flink session with " + taskManagerCount + " TaskManagers"; if (detached) { name += " (detached)"; } } else { name = customName; } appContext.setApplicationName(name); // application name appContext.setApplicationType("Apache Flink"); appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); if (yarnQueue != null) { appContext.setQueue(yarnQueue); } // add a hook to clean up in case deployment fails Thread deploymentFailureHook = new DeploymentFailureHook(yarnClient, yarnApplication); Runtime.getRuntime().addShutdownHook(deploymentFailureHook); LOG.info("Submitting application master " + appId); yarnClient.submitApplication(appContext); LOG.info("Waiting for the cluster to be allocated"); final long startTime = System.currentTimeMillis(); ApplicationReport report; YarnApplicationState lastAppState = YarnApplicationState.NEW; loop: while (true) { try { report = yarnClient.getApplicationReport(appId); } catch (IOException e) { throw new YarnDeploymentException("Failed to deploy the cluster: " + e.getMessage()); } YarnApplicationState appState = report.getYarnApplicationState(); switch (appState) { case FAILED: case FINISHED: case KILLED: throw new YarnDeploymentException("The YARN application unexpectedly switched to state " + appState + " during deployment. \n" + "Diagnostics from YARN: " + report.getDiagnostics() + "\n" + "If log aggregation is enabled on your cluster, use this " + "command to further investigate the issue:\n" + "yarn logs -applicationId " + appId); //break .. case RUNNING: LOG.info("YARN application has been deployed successfully."); break loop; default: if (appState != lastAppState) { LOG.info("Deploying cluster, current state " + appState); } if (System.currentTimeMillis() - startTime > 60000) { LOG.info("Deployment took more than 60 seconds. Please check if the " + "requested resources are available in the YARN cluster"); } } lastAppState = appState; Thread.sleep(250); } // print the application id for user to cancel themselves. if (isDetachedMode()) { LOG.info("The Flink YARN client has been started in detached mode. In order to stop " + "Flink on YARN, use the following command or a YARN web interface to stop " + "it:\nyarn application -kill " + appId + "\nPlease also note that the " + "temporary files of the YARN session in the home directoy will not be removed."); } // since deployment was successful, remove the hook try { Runtime.getRuntime().removeShutdownHook(deploymentFailureHook); } catch (IllegalStateException e) { // we're already in the shut down hook. } String host = report.getHost(); int port = report.getRpcPort(); // Correctly initialize the Flink config flinkConfiguration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, host); flinkConfiguration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, port); // the Flink cluster is deployed in YARN. Represent cluster return createYarnClusterClient(this, yarnClient, report, flinkConfiguration, sessionFilesDir, true); }
From source file:mvm.rya.accumulo.mr.fileinput.BulkNtripsInputTool.java
License:Apache License
@Override public int run(final String[] args) throws Exception { final Configuration conf = getConf(); try {/*from ww w .j a va2 s. com*/ //conf zk = conf.get(MRUtils.AC_ZK_PROP, zk); ttl = conf.get(MRUtils.AC_TTL_PROP, ttl); instance = conf.get(MRUtils.AC_INSTANCE_PROP, instance); userName = conf.get(MRUtils.AC_USERNAME_PROP, userName); pwd = conf.get(MRUtils.AC_PWD_PROP, pwd); workDirBase = conf.get(WORKDIR_PROP, workDirBase); format = conf.get(MRUtils.FORMAT_PROP, format); conf.set(MRUtils.FORMAT_PROP, format); final String inputDir = args[0]; ZooKeeperInstance zooKeeperInstance = new ZooKeeperInstance(instance, zk); Connector connector = zooKeeperInstance.getConnector(userName, new PasswordToken(pwd)); TableOperations tableOperations = connector.tableOperations(); if (conf.get(AccumuloRdfConfiguration.CONF_ADDITIONAL_INDEXERS) != null) { throw new IllegalArgumentException("Cannot use Bulk N Trips tool with Additional Indexers"); } String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null); if (tablePrefix != null) RdfCloudTripleStoreConstants.prefixTables(tablePrefix); String[] tables = { tablePrefix + RdfCloudTripleStoreConstants.TBL_OSP_SUFFIX, tablePrefix + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, tablePrefix + RdfCloudTripleStoreConstants.TBL_PO_SUFFIX }; Collection<Job> jobs = new ArrayList<Job>(); for (final String tableName : tables) { PrintStream out = null; try { String workDir = workDirBase + "/" + tableName; System.out.println("Loading data into table[" + tableName + "]"); Job job = new Job(new Configuration(conf), "Bulk Ingest load data to Generic RDF Table[" + tableName + "]"); job.setJarByClass(this.getClass()); //setting long job Configuration jobConf = job.getConfiguration(); jobConf.setBoolean("mapred.map.tasks.speculative.execution", false); jobConf.setBoolean("mapred.reduce.tasks.speculative.execution", false); jobConf.set("io.sort.mb", jobConf.get("io.sort.mb", "256")); jobConf.setBoolean("mapred.compress.map.output", true); // jobConf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); //TODO: I would like LZO compression job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(ParseNtripsMapper.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setCombinerClass(OutStmtMutationsReducer.class); job.setReducerClass(OutStmtMutationsReducer.class); job.setOutputFormatClass(AccumuloFileOutputFormat.class); // AccumuloFileOutputFormat.setZooKeeperInstance(jobConf, instance, zk); jobConf.set(ParseNtripsMapper.TABLE_PROPERTY, tableName); TextInputFormat.setInputPaths(job, new Path(inputDir)); FileSystem fs = FileSystem.get(conf); Path workPath = new Path(workDir); if (fs.exists(workPath)) fs.delete(workPath, true); //make failures dir Path failures = new Path(workDir, "failures"); fs.delete(failures, true); fs.mkdirs(new Path(workDir, "failures")); AccumuloFileOutputFormat.setOutputPath(job, new Path(workDir + "/files")); out = new PrintStream(new BufferedOutputStream(fs.create(new Path(workDir + "/splits.txt")))); if (!tableOperations.exists(tableName)) tableOperations.create(tableName); Collection<Text> splits = tableOperations.getSplits(tableName, Integer.MAX_VALUE); for (Text split : splits) out.println(new String(Base64.encodeBase64(TextUtil.getBytes(split)))); job.setNumReduceTasks(splits.size() + 1); out.close(); job.setPartitionerClass(KeyRangePartitioner.class); RangePartitioner.setSplitFile(job, workDir + "/splits.txt"); jobConf.set(WORKDIR_PROP, workDir); job.submit(); jobs.add(job); } catch (Exception re) { throw new RuntimeException(re); } finally { if (out != null) out.close(); } } for (Job job : jobs) { while (!job.isComplete()) { Thread.sleep(1000); } } for (String tableName : tables) { String workDir = workDirBase + "/" + tableName; String filesDir = workDir + "/files"; String failuresDir = workDir + "/failures"; FileSystem fs = FileSystem.get(conf); //make sure that the "accumulo" user can read/write/execute into these directories this path fs.setPermission(new Path(filesDir), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); fs.setPermission(new Path(failuresDir), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); tableOperations.importDirectory(tableName, filesDir, failuresDir, false); } } catch (Exception e) { throw new RuntimeException(e); } return 0; }
From source file:org.apache.accumulo.server.master.recovery.MapRLogCloser.java
License:Apache License
@Override public long close(AccumuloConfiguration conf, VolumeManager fs, Path path) throws IOException { log.info("Recovering file " + path.toString() + " by changing permission to readonly"); FileSystem ns = fs.getVolumeByPath(path).getFileSystem(); FsPermission roPerm = new FsPermission((short) 0444); try {//from ww w . ja v a2s . co m ns.setPermission(path, roPerm); return 0; } catch (IOException ex) { log.error("error recovering lease ", ex); // lets do this again return 1000; } }