Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:com.skp.experiment.fpm.pfpgrowth.PFPGrowth.java

License:Apache License

/**
 * Read the Frequent Patterns generated from Text
 * //from w w w  .j a  v a 2 s.co  m
 * @return List of TopK patterns for each string frequent feature
 */
public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(Parameters params) throws IOException {

    Configuration conf = new Configuration();

    Path frequentPatternsPath = new Path(params.get(OUTPUT), FREQUENT_PATTERNS);
    FileSystem fs = FileSystem.get(frequentPatternsPath.toUri(), conf);
    FileStatus[] outputFiles = fs.globStatus(new Path(frequentPatternsPath, FILE_PATTERN));

    List<Pair<String, TopKStringPatterns>> ret = Lists.newArrayList();
    for (FileStatus fileStatus : outputFiles) {
        ret.addAll(org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth.readFrequentPattern(conf,
                fileStatus.getPath()));
    }
    return ret;
}

From source file:com.sogou.dockeronyarn.client.DockerClient.java

License:Apache License

/**
 * Main run function for the client//  w w  w. j a  v  a 2s .com
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public ApplicationId run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request 
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max. 
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max. 
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    //appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources         
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem 
    // Create a local resource to point to the destination jar path 
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);

    // Set the log4j properties if needed 
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed. 
    // To do this, we need to first copy into the filesystem that is visible 
    // to the yarn framework. 
    // We do not need to set this as a local resource for the application 
    // master as the application master does not need it.       
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    //if (!shellScriptPath.isEmpty()) {
    // Path shellSrc = new Path(fs.getHomeDirectory(), SCRIPT_PATH);
    String shellPathSuffix = SCRIPT_PATH;
    Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
    //fs.copyFromLocalFile(false, true, shellSrc, shellDst);
    hdfsShellScriptLocation = shellDst.toUri().toString();
    FileStatus shellFileStatus = fs.getFileStatus(shellDst);
    hdfsShellScriptLen = shellFileStatus.getLen();
    hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    //}

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the 
    // eventual containers that will be launched to execute the shell scripts
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));

    // Add AppMaster.jar location to classpath       
    // At some point we should not be required to add 
    // the hadoop specific classpaths to the env. 
    // It should be provided out of the box. 
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar)
            .append("./*");

    //    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
    //      .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");

    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
        classPathEnv.append(File.pathSeparatorChar);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties");

    //    for (String c : conf.getStrings(
    //        YarnConfiguration.YARN_APPLICATION_CLASSPATH,
    //        YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
    //      classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
    //      classPathEnv.append(c.trim());
    //    }
    //    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append(
    //      "./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    // Set the necessary command to execute the application master 
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command 
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
    //vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name 
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    vargs.add("--priority " + String.valueOf(shellCmdPriority));
    vargs.add("--container_retry " + String.valueOf(this.container_retry));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and 
    // vcores requirements
    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    // TODO - what is the range for priority? how to decide? 
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success 
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return appId;

}

From source file:com.splicemachine.fs.localfs.SpliceFileSystem.java

License:Apache License

@Override
public void access(Path path, FsAction mode) throws AccessControlException, FileNotFoundException, IOException {
    if (LOG.isTraceEnabled())
        LOG.trace(String.format("access path=%s, mode=%s", path, mode));
    fs.access(new Path(path.toUri().getRawPath()), mode);
}

From source file:com.splicemachine.orc.input.OrcMapreduceRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    OrcNewSplit orcNewSplit = (OrcNewSplit) inputSplit;
    Configuration configuration = taskAttemptContext.getConfiguration();
    double maxMergeDistance = configuration.getDouble(MAX_MERGE_DISTANCE, MAX_MERGE_DISTANCE_DEFAULT);
    double maxReadSize = configuration.getDouble(MAX_READ_SIZE, MAX_READ_SIZE_DEFAULT);
    double streamBufferSize = configuration.getDouble(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE_DEFAULT);
    Path path = orcNewSplit.getPath();
    FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
    long size = fileSystem.getFileStatus(path).getLen();
    FSDataInputStream inputStream = fileSystem.open(path);
    rowStruct = getRowStruct(configuration);
    predicate = getSplicePredicate(configuration);
    List<Integer> partitions = getPartitionIds(configuration);
    List<Integer> columnIds = getColumnIds(configuration);

    List<String> values = null;
    try {/* w  w w . jav  a 2s.c  o m*/
        values = Warehouse.getPartValuesFromPartName(((OrcNewSplit) inputSplit).getPath().toString());
    } catch (MetaException me) {
        throw new IOException(me);
    }
    OrcDataSource orcDataSource = new HdfsOrcDataSource(path.toString(), size,
            new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE),
            new DataSize(maxReadSize, DataSize.Unit.MEGABYTE),
            new DataSize(streamBufferSize, DataSize.Unit.MEGABYTE), inputStream);
    OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(),
            new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE),
            new DataSize(maxReadSize, DataSize.Unit.MEGABYTE));
    orcRecordReader = orcReader.createRecordReader(getColumnsAndTypes(columnIds, rowStruct), predicate,
            HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext(), partitions, values);
}

From source file:com.splicemachine.storage.HNIOFileSystem.java

License:Apache License

private org.apache.hadoop.fs.Path toHPath(Path path) {
    return new org.apache.hadoop.fs.Path(path.toUri());
}

From source file:com.splout.db.benchmark.TablespaceAnalyserCMD.java

License:Apache License

protected JSONTablespaceDefinition loadTablespaceFile(String tablespaceFile)
        throws IOException, JSONSerDe.JSONSerDeException {
    Path file = new Path(tablespaceFile);
    FileSystem fS = FileSystem.get(file.toUri(), getConf());

    if (!fS.exists(file)) {
        throw new IllegalArgumentException("Config input file: " + file + " doesn't exist!");
    }/* w  ww . ja  v  a  2  s  .  co m*/

    String strContents = HadoopUtils.fileToString(fS, file);
    JSONTablespaceDefinition def = JSONSerDe.deSer(strContents, JSONTablespaceDefinition.class);
    return def;
}

From source file:com.splout.db.hadoop.GeneratorCMD.java

License:Apache License

public int run(String[] args) throws Exception {
    JCommander jComm = new JCommander(this);
    jComm.setProgramName(//from ww w . j  a  va 2s .c  o  m
            "Splout Tablespaces Generator. Generates tablespaces, ready to be deployed to a Splout Cluster.");
    try {
        jComm.parse(args);
    } catch (Throwable t) {
        t.printStackTrace();
        jComm.usage();
        return -1;
    }

    if (parallelism < 1) {
        System.err.println("Parallelism must be greater than 0.");
        System.exit(1);
    }

    log.info("Parsing input parameters...");

    // All the tablespaces that will be generated and deployed atomically, hashed by their name
    // We generate this first so we can detect errors in the configuration before even using Hadoop
    Map<String, TablespaceSpec> tablespacesToGenerate = new HashMap<String, TablespaceSpec>();

    // Partition maps to reuse at indexation. Used when sampling is skipped.
    final Map<String, PartitionMap> partitionMapsToReuse = new HashMap<String, PartitionMap>();

    for (String tablespaceFile : tablespaceFiles) {
        Path file = new Path(tablespaceFile);
        FileSystem fS = FileSystem.get(file.toUri(), getConf());

        if (!fS.exists(file)) {
            throw new IllegalArgumentException("Config input file: " + file + " doesn't exist!");
        }

        String strContents = HadoopUtils.fileToString(fS, file);
        JSONTablespaceDefinition def = JSONSerDe.deSer(strContents, JSONTablespaceDefinition.class);
        TablespaceSpec spec = def.build(conf);
        String name = def.getName();

        tablespacesToGenerate.put(name, spec);

        // Reusing partition maps?
        if (qnodeURL != null) {
            partitionMapsToReuse.put(name, retrievePartitionMapfromQNode(name));
        }
    }

    if (!FileSystem.getLocal(conf).equals(FileSystem.get(conf))) {
        File nativeLibs = new File("native");
        if (nativeLibs.exists()) {
            SploutHadoopConfiguration.addSQLite4JavaNativeLibsToDC(conf);
        }
    }

    Path out = new Path(output);
    FileSystem outFs = out.getFileSystem(getConf());
    HadoopUtils.deleteIfExists(outFs, out);

    ExecutorService executor = Executors.newFixedThreadPool(parallelism);
    ExecutorCompletionService<Boolean> ecs = new ExecutorCompletionService<Boolean>(executor);
    ArrayList<Future<Boolean>> generatorFutures = new ArrayList<Future<Boolean>>();

    // Generate each tablespace
    for (final Map.Entry<String, TablespaceSpec> tablespace : tablespacesToGenerate.entrySet()) {
        Path tablespaceOut = new Path(out, tablespace.getKey());
        TablespaceSpec spec = tablespace.getValue();

        log.info("Generating view with Hadoop (" + tablespace.getKey() + ")");
        final TablespaceGenerator viewGenerator = new TablespaceGenerator(spec, tablespaceOut, this.getClass());

        generatorFutures.add(ecs.submit(new Callable<Boolean>() {
            @Override
            public Boolean call() throws Exception {
                if (qnodeURL == null) {
                    viewGenerator.generateView(conf, samplingType, new TupleSampler.RandomSamplingOptions());
                    return true;
                } else {
                    viewGenerator.generateView(conf, partitionMapsToReuse.get(tablespace.getKey()));
                    return true;
                }
            }
        }));
    }

    // Waiting all tasks to finish.
    for (int i = 0; i < tablespacesToGenerate.size(); i++) {
        // Get will throw an exception if the callable returned it.
        try {
            ecs.take().get();
        } catch (ExecutionException e) {
            // One job was wrong. Stopping the rest.
            for (Future<Boolean> task : generatorFutures) {
                task.cancel(true);
            }
            executor.shutdown();
            throw e;
        }
    }

    executor.shutdown();

    log.info("Done!");
    return 0;
}

From source file:com.splout.db.hadoop.TupleSampler.java

License:Apache License

/**
 * Random sampling method a-la-TeraSort, getting some consecutive samples from each InputSplit
 * without using a Job.//from  w  ww. j  a va 2s  . c  om
 * The output is SequenceFile with keys.
 *
 * @return The number of retrieved samples
 */
private long randomSampling(long sampleSize, Configuration hadoopConf, Path outFile, List<InputSplit> splits,
        Map<InputSplit, TableSpec> splitToTableSpec,
        Map<InputSplit, InputFormat<ITuple, NullWritable>> splitToFormat,
        Map<InputSplit, Map<String, String>> specificHadoopConf,
        Map<InputSplit, RecordProcessor> recordProcessorPerSplit,
        Map<InputSplit, JavascriptEngine> splitToJsEngine, int maxSplitsToVisit) throws IOException {

    // Instantiate the writer we will write samples to
    FileSystem fs = FileSystem.get(outFile.toUri(), hadoopConf);

    if (splits.size() == 0) {
        throw new IllegalArgumentException("There are no splits to sample from!");
    }

    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, hadoopConf, outFile, Text.class,
            NullWritable.class);

    logger.info("Sequential sampling options, max splits to visit: " + maxSplitsToVisit + ", samples to take: "
            + sampleSize + ", total number of splits: " + splits.size());
    int blocks = Math.min(maxSplitsToVisit, splits.size());
    blocks = Math.min((int) sampleSize, blocks);
    long recordsPerSample = sampleSize / blocks;
    int sampleStep = splits.size() / blocks;

    long records = 0;

    CounterInterface counterInterface = new CounterInterface(null) {

        public Counter getCounter(String group, String name) {
            return Mockito.mock(Counter.class);
        }

        ;
    };

    // Take N samples from different parts of the input
    for (int i = 0; i < blocks; ++i) {
        TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);

        TaskAttemptContext attemptContext = null;
        try {
            attemptContext = TaskAttemptContextFactory.get(hadoopConf, attemptId);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        InputSplit split = splits.get(sampleStep * i);
        if (specificHadoopConf.get(split) != null) {
            for (Map.Entry<String, String> specificConf : specificHadoopConf.get(split).entrySet()) {
                attemptContext.getConfiguration().set(specificConf.getKey(), specificConf.getValue());
            }
        }
        logger.info("Sampling split: " + split);
        RecordReader<ITuple, NullWritable> reader = null;
        try {
            reader = splitToFormat.get(split).createRecordReader(split, attemptContext);
            reader.initialize(split, attemptContext);

            RecordProcessor processor = recordProcessorPerSplit.get(split);
            Text key = new Text();
            while (reader.nextKeyValue()) {
                //
                ITuple tuple = reader.getCurrentKey();

                ITuple uTuple;
                try {
                    uTuple = processor.process(tuple, tuple.getSchema().getName(), counterInterface);
                } catch (Throwable e) {
                    throw new RuntimeException(e);
                }
                if (uTuple != null) { // user may have filtered the record
                    try {
                        key.set(TablespaceGenerator.getPartitionByKey(uTuple, splitToTableSpec.get(split),
                                splitToJsEngine.get(split)));
                    } catch (Throwable e) {
                        throw new RuntimeException("Error when determining partition key.", e);
                    }

                    writer.append(key, NullWritable.get());
                    records += 1;
                    if ((i + 1) * recordsPerSample <= records) {
                        break;
                    }
                }
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }

    }

    writer.close();
    return records;
}

From source file:com.splout.db.integration.RetailDemo.java

License:Apache License

public void generate(long nRegs, String dnodes, String qnode, Path inputPath, Path outputPath)
        throws Exception {
    Configuration conf = new Configuration();

    FileSystem fS = FileSystem.get(conf);
    HadoopUtils.deleteIfExists(fS, inputPath);
    HadoopUtils.deleteIfExists(fS, outputPath);

    NullWritable nullValue = NullWritable.get();
    Schema retailSchema = new Schema("retail", Fields
            .parse("tienda:string, cliente:int, ticket:double, producto:int, precio:double, fecha:string"));
    ITuple tuple = new Tuple(retailSchema);

    TupleFile.Writer writer = new TupleFile.Writer(fS, conf, inputPath, retailSchema);

    // Writes nRegs Tuples to HDFS
    long soFar = 0;

    while (soFar < nRegs) {
        int tienda = (int) (Math.random() * N_TIENDAS);
        int cliente = (int) (Math.random() * N_CLIENTES);

        tuple.set("tienda", "T" + tienda);
        tuple.set("cliente", cliente);
        double[] precios = new double[N_PRODUCTOS_PER_TICKET];
        double ticket = 0;
        for (int i = 0; i < N_PRODUCTOS_PER_TICKET; i++) {
            precios[i] = ((int) (Math.random() * MAX_PRECIO * 100)) / 100;
            precios[i] = Math.max(precios[i], 5.00);
            ticket += precios[i];//from ww w .  ja va  2 s .c  o m
        }
        tuple.set("ticket", ticket);
        long fecha = System.currentTimeMillis() - ((long) (Math.random() * DAY_SPAN * 24 * 60 * 60 * 1000));
        tuple.set("fecha", fmt.print(fecha));
        for (int i = 0; i < N_PRODUCTOS_PER_TICKET; i++) {
            int producto = (int) (Math.random() * N_PRODUCTOS);
            tuple.set("precio", precios[i]);
            tuple.set("producto", producto);
            writer.append(tuple);
            soFar++;
        }
    }
    writer.close();

    // Generate Splout view (cliente)
    String[] dnodeArray = dnodes.split(",");
    TablespaceSpec tablespace = TablespaceSpec.of(retailSchema, "cliente", inputPath, new TupleInputFormat(),
            dnodeArray.length);
    TablespaceGenerator generateView = new TablespaceGenerator(tablespace, outputPath);
    generateView.generateView(conf, SamplingType.DEFAULT, new TupleSampler.DefaultSamplingOptions());
    PartitionMap partitionMap = generateView.getPartitionMap();
    ReplicationMap replicationMap = ReplicationMap.oneToOneMap(dnodeArray);

    Path deployUri = new Path(outputPath, "store").makeQualified(fS);

    SploutClient client = new SploutClient(qnode);
    client.deploy("retailcliente", partitionMap, replicationMap, deployUri.toUri());

    // Generate Splout view (tienda)
    Path output2 = new Path(outputPath + "-2");
    HadoopUtils.deleteIfExists(fS, output2);
    tablespace = TablespaceSpec.of(retailSchema, "tienda", inputPath, new TupleInputFormat(),
            dnodeArray.length);
    generateView = new TablespaceGenerator(tablespace, output2);

    generateView.generateView(conf, SamplingType.DEFAULT, new TupleSampler.DefaultSamplingOptions());
    partitionMap = generateView.getPartitionMap();
    deployUri = new Path(output2, "store").makeQualified(fS);
    client.deploy("retailtienda", partitionMap, replicationMap, deployUri.toUri());
}

From source file:com.splout.db.integration.TestDemo.java

License:Apache License

public void generate(int nPartitions, long nRegs, String dnodes, String qnode, Path inputPath, Path outputPath)
        throws Exception {
    Configuration conf = new Configuration();

    FileSystem fS = FileSystem.get(conf);
    HadoopUtils.deleteIfExists(fS, inputPath);
    HadoopUtils.deleteIfExists(fS, outputPath);

    NullWritable nullValue = NullWritable.get();
    TupleFile.Writer writer = new TupleFile.Writer(fS, conf, inputPath, SploutHadoopTestUtils.SCHEMA);

    // Writes nRegs Tuples to HDFS
    long soFar = 0;
    while (soFar < nRegs) {
        writer.append(SploutHadoopTestUtils.getTuple("id" + soFar, (int) soFar));
        soFar++;/*from  w ww  .ja v  a2 s .c o  m*/
    }
    writer.close();

    // Generate Splout view
    TablespaceSpec tablespace = TablespaceSpec.of(SploutHadoopTestUtils.SCHEMA, "id", inputPath,
            new TupleInputFormat(), nPartitions);
    TablespaceGenerator generateView = new TablespaceGenerator(tablespace, outputPath);
    generateView.generateView(conf, SamplingType.DEFAULT, new TupleSampler.DefaultSamplingOptions());
    PartitionMap partitionMap = generateView.getPartitionMap();
    ReplicationMap replicationMap = ReplicationMap.oneToOneMap(dnodes.split(","));

    Path deployUri = new Path(outputPath, "store").makeQualified(fS);

    SploutClient client = new SploutClient(qnode);
    client.deploy("tablespace1", partitionMap, replicationMap, deployUri.toUri());
}