List of usage examples for org.apache.hadoop.fs FileSystem getUri
public abstract URI getUri();
From source file:com.splunk.shuttl.archiver.filesystem.hadoop.HadoopArchiveFileSystemFactoryTest.java
License:Apache License
@Parameters(value = { "hadoop.host", "hadoop.port" }) public void create_givenPropertyFile_createsInstance(String hadoopHost, String hadoopPort) throws IOException { File hdfsProperties = createFile(); TUtilsFile.writeKeyValueProperties(hdfsProperties, "hadoop.host = " + hadoopHost, "hadoop.port = " + hadoopPort); HadoopArchiveFileSystem hdfs = HadoopArchiveFileSystemFactory.createWithPropertyFile(hdfsProperties); FileSystem fs = hdfs.getFileSystem(); assertEquals(hadoopHost, fs.getUri().getHost()); assertEquals(hadoopPort, "" + fs.getUri().getPort()); }
From source file:com.srini.hadoopYarn.Client.java
License:Apache License
/** * Main run function for the client/*from w ww. j a v a 2 s. c o m*/ * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setApplicationName(appName); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); Path src = new Path(appMasterJar); String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar"; Path dst = new Path(fs.getHomeDirectory(), pathSuffix); fs.copyFromLocalFile(false, true, src, dst); FileStatus destStatus = fs.getFileStatus(dst); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); // Set the type of resource - file or archive // archives are untarred at destination // we don't need the jar file to be untarred for now amJarRsrc.setType(LocalResourceType.FILE); // Set visibility of the resource // Setting to most private option amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); // Set the resource to be copied over amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); // Set timestamp and length of file so that the framework // can do basic sanity checks for the local resource // after it has been copied over to ensure it is the same // resource the client intended to use with the application amJarRsrc.setTimestamp(destStatus.getModificationTime()); amJarRsrc.setSize(destStatus.getLen()); localResources.put("AppMaster.jar", amJarRsrc); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh"; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar) .append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--num_containers " + String.valueOf(numContainers)); vargs.add("--priority " + String.valueOf(shellCmdPriority)); if (!shellCommand.isEmpty()) { vargs.add("--shell_command " + shellCommand + ""); } if (!shellArgs.isEmpty()) { vargs.add("--shell_args " + shellArgs + ""); } for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); // TODO - what is the range for priority? how to decide? pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return monitorApplication(appId); }
From source file:com.talis.hadoop.rdf.solr.QuadsIndexer.java
License:Apache License
public int run(String[] args) throws Exception { Configuration configuration = getConf(); boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION, Constants.OPTION_USE_COMPRESSION_DEFAULT); if (useCompression) { configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.output.compression.type", "BLOCK"); configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); }/*w w w. j ava2 s . com*/ boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT, Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT); FileSystem outputFs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { outputFs.delete(new Path(args[1]), true); } Job job = new Job(configuration); job.setJobName(JOB_NAME); job.setJarByClass(getClass()); int shards = -1; boolean compressOutput = false; Path input = new Path(args[0]); Path output = new Path(args[1]); Path solrConfig = new Path(args[2]); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); if (shards > 0) { job.setNumReduceTasks(shards); } job.setMapperClass(Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(QuadArrayWritable.class); job.setReducerClass(SolrReducer.class); SolrDocumentConverter.setSolrDocumentConverter(LiteralsIndexer.class, job.getConfiguration()); job.setOutputFormatClass(SolrOutputFormat.class); String zipName = "solr.zip"; FileSystem solrConfigFs = FileSystem.get(solrConfig.toUri(), configuration); final URI baseZipUrl = solrConfigFs.getUri().resolve(solrConfig.toString() + '#' + zipName); DistributedCache.addCacheArchive(baseZipUrl, job.getConfiguration()); job.getConfiguration().set(SolrOutputFormat.SETUP_OK, solrConfig.toString()); SolrOutputFormat.setOutputZipFormat(compressOutput, job.getConfiguration()); if (LOG.isDebugEnabled()) Utils.log(job, LOG); return job.waitForCompletion(true) ? 0 : -1; }
From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.spark.SparkDataSetContext.java
License:Apache License
/** * Resolves the specified URIs by removing files that have been previously read. * * @throws KyloCatalogException if a data set option is invalid * @throws IOException if an I/O error occurs *//*from www. ja v a 2 s. c o m*/ @Nonnull @SuppressWarnings({ "squid:HiddenFieldCheck", "squid:S1192" }) private List<String> resolveHighWaterMarkPaths(@Nonnull final List<String> uris) throws IOException { // Get configuration final Configuration conf = delegate.getHadoopConfiguration(client); final String highWaterMarkName = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.HIGH_WATER_MARK), SparkUtil.getOrElse(getOption(HIGH_WATER_MARK_OPTION), null)); final Job job = Job.getInstance(conf); final String highWaterMarkValue = client.getHighWaterMarks().get(highWaterMarkName); if (highWaterMarkValue != null) { try { HighWaterMarkInputFormat.setHighWaterMark(job, Long.parseLong(highWaterMarkValue)); } catch (final NumberFormatException e) { throw new KyloCatalogException( "Invalid " + HIGH_WATER_MARK_OPTION + " value: " + highWaterMarkValue, e); } } final String maxFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MAX_FILE_AGE), SparkUtil.getOrElse(getOption(MAX_AGE_OPTION), null)); if (maxFileAge != null) { try { HighWaterMarkInputFormat.setMaxFileAge(job, Long.parseLong(maxFileAge)); } catch (final NumberFormatException e) { throw new KyloCatalogException("Invalid " + MAX_AGE_OPTION + " value: " + maxFileAge, e); } } final String minFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MIN_FILE_AGE), SparkUtil.getOrElse(getOption(MIN_AGE_OPTION), null)); if (minFileAge != null) { try { HighWaterMarkInputFormat.setMinFileAge(job, Long.parseLong(minFileAge)); } catch (final NumberFormatException e) { throw new KyloCatalogException("Invalid " + MIN_AGE_OPTION + " value: " + minFileAge, e); } } // Convert URIs to Paths final Path[] paths = new Path[uris.size()]; for (int i = 0; i < uris.size(); ++i) { final Path path = new Path(uris.get(i)); final FileSystem fs = path.getFileSystem(conf); paths[i] = path.makeQualified(fs.getUri(), fs.getWorkingDirectory()); } HighWaterMarkInputFormat.setInputPaths(job, paths); // Get high water mark paths final HighWaterMarkInputFormat inputFormat = new HighWaterMarkInputFormat(); final List<FileStatus> files = inputFormat.listStatus(job); client.setHighWaterMarks( Collections.singletonMap(highWaterMarkName, Long.toString(inputFormat.getLastHighWaterMark()))); // Return resolved paths final List<String> resolvedPaths = new ArrayList<>(files.size()); if (files.isEmpty()) { resolvedPaths.add("file:/dev/null"); } else { for (final FileStatus file : files) { resolvedPaths.add(file.getPath().toString()); } } return resolvedPaths; }
From source file:com.toy.Client.java
License:Apache License
/** * Start a new Application Master and deploy the web application on 2 Tomcat containers * * @throws Exception/*from w w w. j a v a 2s . c om*/ */ void start() throws Exception { //Check tomcat dir final File tomcatHomeDir = new File(toyConfig.tomcat); final File tomcatLibraries = new File(tomcatHomeDir, "lib"); final File tomcatBinaries = new File(tomcatHomeDir, "bin"); Preconditions.checkState(tomcatLibraries.isDirectory(), tomcatLibraries.getAbsolutePath() + " does not exist"); //Check war file final File warFile = new File(toyConfig.war); Preconditions.checkState(warFile.isFile(), warFile.getAbsolutePath() + " does not exist"); yarn = YarnClient.createYarnClient(); yarn.init(configuration); yarn.start(); YarnClientApplication yarnApplication = yarn.createApplication(); GetNewApplicationResponse newApplication = yarnApplication.getNewApplicationResponse(); appId = newApplication.getApplicationId(); ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext(); appContext.setApplicationName("Tomcat : " + tomcatHomeDir.getName() + "\n War : " + warFile.getName()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // Register required libraries Map<String, LocalResource> localResources = new HashMap<>(); FileSystem fs = FileSystem.get(configuration); uploadDepAndRegister(localResources, appId, fs, "lib-ext/curator-client-2.3.0.jar"); uploadDepAndRegister(localResources, appId, fs, "lib-ext/curator-framework-2.3.0.jar"); uploadDepAndRegister(localResources, appId, fs, "lib-ext/curator-recipes-2.3.0.jar"); // Register application master jar registerLocalResource(localResources, appId, fs, new Path(appMasterJar)); // Register the WAR that will be deployed on Tomcat registerLocalResource(localResources, appId, fs, new Path(warFile.getAbsolutePath())); // Register Tomcat libraries for (File lib : tomcatLibraries.listFiles()) { registerLocalResource(localResources, appId, fs, new Path(lib.getAbsolutePath())); } File juli = new File(tomcatBinaries, "tomcat-juli.jar"); if (juli.exists()) { registerLocalResource(localResources, appId, fs, new Path(juli.getAbsolutePath())); } amContainer.setLocalResources(localResources); // Setup master environment Map<String, String> env = new HashMap<>(); final String TOMCAT_LIBS = fs.getHomeDirectory() + "/" + Constants.TOY_PREFIX + appId.toString(); env.put(Constants.TOMCAT_LIBS, TOMCAT_LIBS); if (toyConfig.zookeeper != null) { env.put(Constants.ZOOKEEPER_QUORUM, toyConfig.zookeeper); } else { env.put(Constants.ZOOKEEPER_QUORUM, NetUtils.getHostname()); } // 1. Compute classpath StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$()) .append(File.pathSeparatorChar).append("./*"); for (String c : configuration.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (configuration.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); env.put(Constants.WAR, warFile.getName()); // For unit test with YarnMiniCluster env.put(YarnConfiguration.RM_SCHEDULER_ADDRESS, configuration.get(YarnConfiguration.RM_SCHEDULER_ADDRESS)); amContainer.setEnvironment(env); // 1.2 Set constraint for the app master Resource capability = Records.newRecord(Resource.class); capability.setMemory(32); appContext.setResource(capability); // 2. Compute app master cmd line Vector<CharSequence> vargs = new Vector<>(10); // Set java executable command vargs.add(ApplicationConstants.Environment.JAVA_HOME.$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx32m"); // Set class name vargs.add(TOYMaster.class.getCanonicalName()); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<>(); commands.add(command.toString()); amContainer.setCommands(commands); appContext.setAMContainerSpec(amContainer); // 3. Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = configuration.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new Exception("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final org.apache.hadoop.security.token.Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (org.apache.hadoop.security.token.Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setQueue("default"); LOG.info("Submitting TOY application {} to ASM", appId.toString()); yarn.submitApplication(appContext); // Monitor the application and exit if it is RUNNING monitorApplication(appId); }
From source file:com.trendmicro.hdfs.webdav.test.MiniClusterTestUtil.java
License:Apache License
public MiniDFSCluster startMiniDFSCluster(int servers, final File dir, final String hosts[]) throws Exception { if (dir == null) { clusterTestBuildDir = setupClusterTestBuildDir(); } else {/* ww w . j ava 2 s. co m*/ clusterTestBuildDir = dir; } System.setProperty(TEST_DIRECTORY_KEY, clusterTestBuildDir.toString()); System.setProperty("test.cache.data", clusterTestBuildDir.toString()); Configuration conf = getConfiguration(); dfsCluster = new MiniDFSCluster(0, conf, servers, true, true, true, null, null, hosts, null); FileSystem fs = dfsCluster.getFileSystem(); conf.set("fs.defaultFS", fs.getUri().toString()); conf.set("fs.default.name", fs.getUri().toString()); return dfsCluster; }
From source file:com.uber.hoodie.common.file.HoodieAppendLog.java
License:Apache License
/** * Construct the preferred type of SequenceFile Writer. * @param fs The configured filesystem.//from w w w. ja v a 2 s.c om * @param conf The configuration. * @param name The name of the file. * @param keyClass The 'key' type. * @param valClass The 'value' type. * @param bufferSize buffer size for the underlaying outputstream. * @param replication replication factor for the file. * @param blockSize block size for the file. * @param createParent create parent directory if non-existent * @param compressionType The compression type. * @param codec The compression codec. * @param metadata The metadata of the file. * @return Returns the handle to the constructed SequenceFile Writer. * @throws IOException */ @Deprecated public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, int bufferSize, short replication, long blockSize, boolean createParent, CompressionType compressionType, CompressionCodec codec, Metadata metadata) throws IOException { return createWriter(FileContext.getFileContext(fs.getUri(), conf), conf, name, keyClass, valClass, compressionType, codec, metadata, EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), CreateOpts.bufferSize(bufferSize), createParent ? CreateOpts.createParent() : CreateOpts.donotCreateParent(), CreateOpts.repFac(replication), CreateOpts.blockSize(blockSize)); }
From source file:com.uber.hoodie.common.io.storage.HoodieWrapperFileSystem.java
License:Apache License
public HoodieWrapperFileSystem(FileSystem fileSystem, ConsistencyGuard consistencyGuard) { this.fileSystem = fileSystem; this.uri = fileSystem.getUri(); this.consistencyGuard = consistencyGuard; }
From source file:com.xiaomi.linden.hadoop.indexing.reduce.LindenReducer.java
License:Apache License
@Override protected void reduce(Shard key, Iterable<IntermediateForm> values, Context context) throws IOException, InterruptedException { logger.info("Construct a shard writer for " + key); FileSystem fs = FileSystem.get(conf); // debug:/*from w w w .jav a 2 s .co m*/ logger.info("filesystem is: " + fs.getUri()); String temp = mapreduceuceTempDir + Path.SEPARATOR + "shard_" + key.toFlatString() + "_" + System.currentTimeMillis(); logger.info("mapreduceuceTempDir is: " + mapreduceuceTempDir); final ShardWriter writer = new ShardWriter(fs, key, temp, conf); // update the shard Iterator<IntermediateForm> iterator = values.iterator(); while (iterator.hasNext()) { IntermediateForm form = iterator.next(); writer.process(form, facetsConfig); } // close the shard new Closeable() { volatile boolean closed = false; @Override public void close() throws IOException { // spawn a thread to give progress heartbeats Thread prog = new Thread() { @Override public void run() { while (!closed) { try { Thread.sleep(1000); } catch (InterruptedException e) { continue; } catch (Throwable e) { return; } } } }; try { prog.start(); if (writer != null) { writer.optimize(); // added this option to optimize after all the docs have been added; writer.close(); } } finally { closed = true; } } }.close(); logger.info("Closed the shard writer for " + key + ", writer = " + writer); context.write(key, DONE); }
From source file:com.yata.core.HDFSManager.java
License:Apache License
/** * * @param hdfsTestDataSourceFile/*ww w .jav a 2s . c o m*/ * @param hdfsTestDataTargetFile * @throws IOException * * hadoop fs -cp /projects/ddsw/dev/data/backup/dealer_hierarchy/<<DOMAIN_NAME>>/<<FILE_NAME>> /projects/ddsw/dev/data/raw/nas/<<DOMAIN_NAME>> */ public void copyHDFSData(String hdfsTestDataSourceFile, String hdfsTestDataTargetFile) throws OozieClientException { System.out.println("copyHDFSData@" + className + " : Loading Test Data From :-> " + hdfsTestDataSourceFile + " : Into :-> " + hdfsTestDataTargetFile); FileSystem hdfs = null; Path hdfsTestDataSource = null; Path hdfsTestDataTarget = null; try { hdfs = getHdfsFileSytem(); System.out.println("copyHDFSData@" + className + " : HDFS :-> " + hdfs); System.out.println("copyHDFSData@" + className + " : HDFSHomeDirectory :-> " + hdfs.getHomeDirectory()); System.out.println("copyHDFSData@" + className + " : HDFS-URI :-> " + hdfs.getUri()); System.out.println( "copyHDFSData@" + className + " : HDFSWorkingDirectory :-> " + hdfs.getWorkingDirectory()); System.out.println("copyHDFSData@" + className + " : HDFS : " + hdfs + " : Exists :-> " + hdfs.exists(hdfs.getHomeDirectory())); hdfsTestDataSource = new Path(hdfs.getUri().getPath() + hdfsTestDataSourceFile); hdfsTestDataTarget = new Path(hdfs.getUri().getPath() + hdfsTestDataTargetFile); System.out.println("copyHDFSData@" + className + " : HDFS TEST DATA : " + hdfsTestDataSource + " : Exists :-> " + hdfs.exists(hdfsTestDataSource)); System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget)); } catch (IOException e) { e.printStackTrace(); throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className + " : IOException while getting HDFS FileSystem - EXITING..."); } FileUtil hdfsUtil = new FileUtil(); try { hdfsUtil.copy(hdfs, hdfsTestDataSource, hdfs, hdfsTestDataTarget, false, true, hdfs.getConf()); System.out.println("copyHDFSData@" + className + " : NOW : HDFS TEST DATA : " + hdfsTestDataSource + " : Exists :-> " + hdfs.exists(hdfsTestDataSource)); System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget)); } catch (IOException e) { e.printStackTrace(); throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className + " : IOException while Copying HDFS Data - EXITING..."); } /** * IMPORTANT * If the Source Data file on HDFS is not owned by the Hive/Hadoop User, then use the command below to * change the permission for Hive/Hadoop User to move/delete the file once processed... */ try { hdfs.setPermission(hdfsTestDataTarget, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.READ_EXECUTE)); } catch (IOException e) { e.printStackTrace(); throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className + " : IOException while Changing HDFS File Permissions - EXITING..."); } }