List of usage examples for org.apache.hadoop.fs Path toUri
public URI toUri()
From source file:com.skp.experiment.fpm.pfpgrowth.PFPGrowth.java
License:Apache License
/** * Read the Frequent Patterns generated from Text * //from w w w .j a v a 2 s.co m * @return List of TopK patterns for each string frequent feature */ public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(Parameters params) throws IOException { Configuration conf = new Configuration(); Path frequentPatternsPath = new Path(params.get(OUTPUT), FREQUENT_PATTERNS); FileSystem fs = FileSystem.get(frequentPatternsPath.toUri(), conf); FileStatus[] outputFiles = fs.globStatus(new Path(frequentPatternsPath, FILE_PATTERN)); List<Pair<String, TopKStringPatterns>> ret = Lists.newArrayList(); for (FileStatus fileStatus : outputFiles) { ret.addAll(org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth.readFrequentPattern(conf, fileStatus.getPath())); } return ret; }
From source file:com.sogou.dockeronyarn.client.DockerClient.java
License:Apache License
/** * Main run function for the client// w w w. j a v a 2s .com * @return true if application completed successfully * @throws IOException * @throws YarnException */ public ApplicationId run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); //appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; //if (!shellScriptPath.isEmpty()) { // Path shellSrc = new Path(fs.getHomeDirectory(), SCRIPT_PATH); String shellPathSuffix = SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); //fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); //} if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar) .append("./*"); // StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) // .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // for (String c : conf.getStrings( // YarnConfiguration.YARN_APPLICATION_CLASSPATH, // YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { // classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); // classPathEnv.append(c.trim()); // } // classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append( // "./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$() + "/bin/java"); //vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); vargs.add("--priority " + String.valueOf(shellCmdPriority)); vargs.add("--container_retry " + String.valueOf(this.container_retry)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application return appId; }
From source file:com.splicemachine.fs.localfs.SpliceFileSystem.java
License:Apache License
@Override public void access(Path path, FsAction mode) throws AccessControlException, FileNotFoundException, IOException { if (LOG.isTraceEnabled()) LOG.trace(String.format("access path=%s, mode=%s", path, mode)); fs.access(new Path(path.toUri().getRawPath()), mode); }
From source file:com.splicemachine.orc.input.OrcMapreduceRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { OrcNewSplit orcNewSplit = (OrcNewSplit) inputSplit; Configuration configuration = taskAttemptContext.getConfiguration(); double maxMergeDistance = configuration.getDouble(MAX_MERGE_DISTANCE, MAX_MERGE_DISTANCE_DEFAULT); double maxReadSize = configuration.getDouble(MAX_READ_SIZE, MAX_READ_SIZE_DEFAULT); double streamBufferSize = configuration.getDouble(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE_DEFAULT); Path path = orcNewSplit.getPath(); FileSystem fileSystem = FileSystem.get(path.toUri(), configuration); long size = fileSystem.getFileStatus(path).getLen(); FSDataInputStream inputStream = fileSystem.open(path); rowStruct = getRowStruct(configuration); predicate = getSplicePredicate(configuration); List<Integer> partitions = getPartitionIds(configuration); List<Integer> columnIds = getColumnIds(configuration); List<String> values = null; try {/* w w w . jav a 2s.c o m*/ values = Warehouse.getPartValuesFromPartName(((OrcNewSplit) inputSplit).getPath().toString()); } catch (MetaException me) { throw new IOException(me); } OrcDataSource orcDataSource = new HdfsOrcDataSource(path.toString(), size, new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE), new DataSize(maxReadSize, DataSize.Unit.MEGABYTE), new DataSize(streamBufferSize, DataSize.Unit.MEGABYTE), inputStream); OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(), new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE), new DataSize(maxReadSize, DataSize.Unit.MEGABYTE)); orcRecordReader = orcReader.createRecordReader(getColumnsAndTypes(columnIds, rowStruct), predicate, HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext(), partitions, values); }
From source file:com.splicemachine.storage.HNIOFileSystem.java
License:Apache License
private org.apache.hadoop.fs.Path toHPath(Path path) { return new org.apache.hadoop.fs.Path(path.toUri()); }
From source file:com.splout.db.benchmark.TablespaceAnalyserCMD.java
License:Apache License
protected JSONTablespaceDefinition loadTablespaceFile(String tablespaceFile) throws IOException, JSONSerDe.JSONSerDeException { Path file = new Path(tablespaceFile); FileSystem fS = FileSystem.get(file.toUri(), getConf()); if (!fS.exists(file)) { throw new IllegalArgumentException("Config input file: " + file + " doesn't exist!"); }/* w ww . ja v a 2 s . co m*/ String strContents = HadoopUtils.fileToString(fS, file); JSONTablespaceDefinition def = JSONSerDe.deSer(strContents, JSONTablespaceDefinition.class); return def; }
From source file:com.splout.db.hadoop.GeneratorCMD.java
License:Apache License
public int run(String[] args) throws Exception { JCommander jComm = new JCommander(this); jComm.setProgramName(//from ww w . j a va 2s .c o m "Splout Tablespaces Generator. Generates tablespaces, ready to be deployed to a Splout Cluster."); try { jComm.parse(args); } catch (Throwable t) { t.printStackTrace(); jComm.usage(); return -1; } if (parallelism < 1) { System.err.println("Parallelism must be greater than 0."); System.exit(1); } log.info("Parsing input parameters..."); // All the tablespaces that will be generated and deployed atomically, hashed by their name // We generate this first so we can detect errors in the configuration before even using Hadoop Map<String, TablespaceSpec> tablespacesToGenerate = new HashMap<String, TablespaceSpec>(); // Partition maps to reuse at indexation. Used when sampling is skipped. final Map<String, PartitionMap> partitionMapsToReuse = new HashMap<String, PartitionMap>(); for (String tablespaceFile : tablespaceFiles) { Path file = new Path(tablespaceFile); FileSystem fS = FileSystem.get(file.toUri(), getConf()); if (!fS.exists(file)) { throw new IllegalArgumentException("Config input file: " + file + " doesn't exist!"); } String strContents = HadoopUtils.fileToString(fS, file); JSONTablespaceDefinition def = JSONSerDe.deSer(strContents, JSONTablespaceDefinition.class); TablespaceSpec spec = def.build(conf); String name = def.getName(); tablespacesToGenerate.put(name, spec); // Reusing partition maps? if (qnodeURL != null) { partitionMapsToReuse.put(name, retrievePartitionMapfromQNode(name)); } } if (!FileSystem.getLocal(conf).equals(FileSystem.get(conf))) { File nativeLibs = new File("native"); if (nativeLibs.exists()) { SploutHadoopConfiguration.addSQLite4JavaNativeLibsToDC(conf); } } Path out = new Path(output); FileSystem outFs = out.getFileSystem(getConf()); HadoopUtils.deleteIfExists(outFs, out); ExecutorService executor = Executors.newFixedThreadPool(parallelism); ExecutorCompletionService<Boolean> ecs = new ExecutorCompletionService<Boolean>(executor); ArrayList<Future<Boolean>> generatorFutures = new ArrayList<Future<Boolean>>(); // Generate each tablespace for (final Map.Entry<String, TablespaceSpec> tablespace : tablespacesToGenerate.entrySet()) { Path tablespaceOut = new Path(out, tablespace.getKey()); TablespaceSpec spec = tablespace.getValue(); log.info("Generating view with Hadoop (" + tablespace.getKey() + ")"); final TablespaceGenerator viewGenerator = new TablespaceGenerator(spec, tablespaceOut, this.getClass()); generatorFutures.add(ecs.submit(new Callable<Boolean>() { @Override public Boolean call() throws Exception { if (qnodeURL == null) { viewGenerator.generateView(conf, samplingType, new TupleSampler.RandomSamplingOptions()); return true; } else { viewGenerator.generateView(conf, partitionMapsToReuse.get(tablespace.getKey())); return true; } } })); } // Waiting all tasks to finish. for (int i = 0; i < tablespacesToGenerate.size(); i++) { // Get will throw an exception if the callable returned it. try { ecs.take().get(); } catch (ExecutionException e) { // One job was wrong. Stopping the rest. for (Future<Boolean> task : generatorFutures) { task.cancel(true); } executor.shutdown(); throw e; } } executor.shutdown(); log.info("Done!"); return 0; }
From source file:com.splout.db.hadoop.TupleSampler.java
License:Apache License
/** * Random sampling method a-la-TeraSort, getting some consecutive samples from each InputSplit * without using a Job.//from w ww. j a va 2s . c om * The output is SequenceFile with keys. * * @return The number of retrieved samples */ private long randomSampling(long sampleSize, Configuration hadoopConf, Path outFile, List<InputSplit> splits, Map<InputSplit, TableSpec> splitToTableSpec, Map<InputSplit, InputFormat<ITuple, NullWritable>> splitToFormat, Map<InputSplit, Map<String, String>> specificHadoopConf, Map<InputSplit, RecordProcessor> recordProcessorPerSplit, Map<InputSplit, JavascriptEngine> splitToJsEngine, int maxSplitsToVisit) throws IOException { // Instantiate the writer we will write samples to FileSystem fs = FileSystem.get(outFile.toUri(), hadoopConf); if (splits.size() == 0) { throw new IllegalArgumentException("There are no splits to sample from!"); } @SuppressWarnings("deprecation") SequenceFile.Writer writer = new SequenceFile.Writer(fs, hadoopConf, outFile, Text.class, NullWritable.class); logger.info("Sequential sampling options, max splits to visit: " + maxSplitsToVisit + ", samples to take: " + sampleSize + ", total number of splits: " + splits.size()); int blocks = Math.min(maxSplitsToVisit, splits.size()); blocks = Math.min((int) sampleSize, blocks); long recordsPerSample = sampleSize / blocks; int sampleStep = splits.size() / blocks; long records = 0; CounterInterface counterInterface = new CounterInterface(null) { public Counter getCounter(String group, String name) { return Mockito.mock(Counter.class); } ; }; // Take N samples from different parts of the input for (int i = 0; i < blocks; ++i) { TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1); TaskAttemptContext attemptContext = null; try { attemptContext = TaskAttemptContextFactory.get(hadoopConf, attemptId); } catch (Exception e) { throw new RuntimeException(e); } InputSplit split = splits.get(sampleStep * i); if (specificHadoopConf.get(split) != null) { for (Map.Entry<String, String> specificConf : specificHadoopConf.get(split).entrySet()) { attemptContext.getConfiguration().set(specificConf.getKey(), specificConf.getValue()); } } logger.info("Sampling split: " + split); RecordReader<ITuple, NullWritable> reader = null; try { reader = splitToFormat.get(split).createRecordReader(split, attemptContext); reader.initialize(split, attemptContext); RecordProcessor processor = recordProcessorPerSplit.get(split); Text key = new Text(); while (reader.nextKeyValue()) { // ITuple tuple = reader.getCurrentKey(); ITuple uTuple; try { uTuple = processor.process(tuple, tuple.getSchema().getName(), counterInterface); } catch (Throwable e) { throw new RuntimeException(e); } if (uTuple != null) { // user may have filtered the record try { key.set(TablespaceGenerator.getPartitionByKey(uTuple, splitToTableSpec.get(split), splitToJsEngine.get(split))); } catch (Throwable e) { throw new RuntimeException("Error when determining partition key.", e); } writer.append(key, NullWritable.get()); records += 1; if ((i + 1) * recordsPerSample <= records) { break; } } } } catch (InterruptedException e) { throw new RuntimeException(e); } } writer.close(); return records; }
From source file:com.splout.db.integration.RetailDemo.java
License:Apache License
public void generate(long nRegs, String dnodes, String qnode, Path inputPath, Path outputPath) throws Exception { Configuration conf = new Configuration(); FileSystem fS = FileSystem.get(conf); HadoopUtils.deleteIfExists(fS, inputPath); HadoopUtils.deleteIfExists(fS, outputPath); NullWritable nullValue = NullWritable.get(); Schema retailSchema = new Schema("retail", Fields .parse("tienda:string, cliente:int, ticket:double, producto:int, precio:double, fecha:string")); ITuple tuple = new Tuple(retailSchema); TupleFile.Writer writer = new TupleFile.Writer(fS, conf, inputPath, retailSchema); // Writes nRegs Tuples to HDFS long soFar = 0; while (soFar < nRegs) { int tienda = (int) (Math.random() * N_TIENDAS); int cliente = (int) (Math.random() * N_CLIENTES); tuple.set("tienda", "T" + tienda); tuple.set("cliente", cliente); double[] precios = new double[N_PRODUCTOS_PER_TICKET]; double ticket = 0; for (int i = 0; i < N_PRODUCTOS_PER_TICKET; i++) { precios[i] = ((int) (Math.random() * MAX_PRECIO * 100)) / 100; precios[i] = Math.max(precios[i], 5.00); ticket += precios[i];//from ww w . ja va 2 s .c o m } tuple.set("ticket", ticket); long fecha = System.currentTimeMillis() - ((long) (Math.random() * DAY_SPAN * 24 * 60 * 60 * 1000)); tuple.set("fecha", fmt.print(fecha)); for (int i = 0; i < N_PRODUCTOS_PER_TICKET; i++) { int producto = (int) (Math.random() * N_PRODUCTOS); tuple.set("precio", precios[i]); tuple.set("producto", producto); writer.append(tuple); soFar++; } } writer.close(); // Generate Splout view (cliente) String[] dnodeArray = dnodes.split(","); TablespaceSpec tablespace = TablespaceSpec.of(retailSchema, "cliente", inputPath, new TupleInputFormat(), dnodeArray.length); TablespaceGenerator generateView = new TablespaceGenerator(tablespace, outputPath); generateView.generateView(conf, SamplingType.DEFAULT, new TupleSampler.DefaultSamplingOptions()); PartitionMap partitionMap = generateView.getPartitionMap(); ReplicationMap replicationMap = ReplicationMap.oneToOneMap(dnodeArray); Path deployUri = new Path(outputPath, "store").makeQualified(fS); SploutClient client = new SploutClient(qnode); client.deploy("retailcliente", partitionMap, replicationMap, deployUri.toUri()); // Generate Splout view (tienda) Path output2 = new Path(outputPath + "-2"); HadoopUtils.deleteIfExists(fS, output2); tablespace = TablespaceSpec.of(retailSchema, "tienda", inputPath, new TupleInputFormat(), dnodeArray.length); generateView = new TablespaceGenerator(tablespace, output2); generateView.generateView(conf, SamplingType.DEFAULT, new TupleSampler.DefaultSamplingOptions()); partitionMap = generateView.getPartitionMap(); deployUri = new Path(output2, "store").makeQualified(fS); client.deploy("retailtienda", partitionMap, replicationMap, deployUri.toUri()); }
From source file:com.splout.db.integration.TestDemo.java
License:Apache License
public void generate(int nPartitions, long nRegs, String dnodes, String qnode, Path inputPath, Path outputPath) throws Exception { Configuration conf = new Configuration(); FileSystem fS = FileSystem.get(conf); HadoopUtils.deleteIfExists(fS, inputPath); HadoopUtils.deleteIfExists(fS, outputPath); NullWritable nullValue = NullWritable.get(); TupleFile.Writer writer = new TupleFile.Writer(fS, conf, inputPath, SploutHadoopTestUtils.SCHEMA); // Writes nRegs Tuples to HDFS long soFar = 0; while (soFar < nRegs) { writer.append(SploutHadoopTestUtils.getTuple("id" + soFar, (int) soFar)); soFar++;/*from w ww .ja v a2 s .c o m*/ } writer.close(); // Generate Splout view TablespaceSpec tablespace = TablespaceSpec.of(SploutHadoopTestUtils.SCHEMA, "id", inputPath, new TupleInputFormat(), nPartitions); TablespaceGenerator generateView = new TablespaceGenerator(tablespace, outputPath); generateView.generateView(conf, SamplingType.DEFAULT, new TupleSampler.DefaultSamplingOptions()); PartitionMap partitionMap = generateView.getPartitionMap(); ReplicationMap replicationMap = ReplicationMap.oneToOneMap(dnodes.split(",")); Path deployUri = new Path(outputPath, "store").makeQualified(fS); SploutClient client = new SploutClient(qnode); client.deploy("tablespace1", partitionMap, replicationMap, deployUri.toUri()); }