Example usage for org.apache.hadoop.conf Configuration getBoolean

List of usage examples for org.apache.hadoop.conf Configuration getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue) 

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:ImportTsv.java

License:Apache License

/**
 * Sets up the actual job./*from  w  w  w  .j  av a  2s .  c  o m*/
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
        throws IOException, ClassNotFoundException {
    Job job = null;
    try (Connection connection = ConnectionFactory.createConnection(conf)) {
        try (Admin admin = connection.getAdmin()) {
            // Support non-XML supported characters
            // by re-encoding the passed separator as a Base64 string.
            String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
            if (actualSeparator != null) {
                conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
            }

            // See if a non-default Mapper was set
            String mapperClassName = conf.get(MAPPER_CONF_KEY);
            Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

            TableName tableName = TableName.valueOf(args[0]);
            Path inputDir = new Path(args[1]);

            // set filter
            conf.set(EASTCOM_FILTER_PARAMS, args[3]);
            conf.set(EASTCOM_FILTER_DEFINE, args[4]);

            String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString());
            job = Job.getInstance(conf, jobName);
            job.setJarByClass(mapperClass);
            FileInputFormat.setInputPaths(job, inputDir);
            job.setInputFormatClass(TextInputFormat.class);
            job.setMapperClass(mapperClass);
            String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
            String columns[] = conf.getStrings(COLUMNS_CONF_KEY);
            if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
                String fileLoc = conf.get(CREDENTIALS_LOCATION);
                Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
                job.getCredentials().addAll(cred);
            }

            if (hfileOutPath != null) {
                if (!admin.tableExists(tableName)) {
                    String errorMsg = format("Table '%s' does not exist.", tableName);
                    if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {
                        LOG.warn(errorMsg);
                        // TODO: this is backwards. Instead of depending on the existence of a table,
                        // create a sane splits file for HFileOutputFormat based on data sampling.
                        createTable(admin, tableName, columns);
                    } else {
                        LOG.error(errorMsg);
                        throw new TableNotFoundException(errorMsg);
                    }
                }
                try (HTable table = (HTable) connection.getTable(tableName)) {
                    boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false);
                    // if no.strict is false then check column family
                    if (!noStrict) {
                        ArrayList<String> unmatchedFamilies = new ArrayList<String>();
                        Set<String> cfSet = getColumnFamilies(columns);
                        HTableDescriptor tDesc = table.getTableDescriptor();
                        for (String cf : cfSet) {
                            if (tDesc.getFamily(Bytes.toBytes(cf)) == null) {
                                unmatchedFamilies.add(cf);
                            }
                        }
                        if (unmatchedFamilies.size() > 0) {
                            ArrayList<String> familyNames = new ArrayList<String>();
                            for (HColumnDescriptor family : table.getTableDescriptor().getFamilies()) {
                                familyNames.add(family.getNameAsString());
                            }
                            String msg = "Column Families " + unmatchedFamilies + " specified in "
                                    + COLUMNS_CONF_KEY + " does not match with any of the table " + tableName
                                    + " column families " + familyNames + ".\n"
                                    + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY
                                    + "=true.\n";
                            usage(msg);
                            System.exit(-1);
                        }
                    }
                    job.setReducerClass(PutSortReducer.class);
                    Path outputDir = new Path(hfileOutPath);
                    FileOutputFormat.setOutputPath(job, outputDir);
                    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
                    if (mapperClass.equals(TsvImporterTextMapper.class)) {
                        job.setMapOutputValueClass(Text.class);
                        job.setReducerClass(TextSortReducer.class);
                    } else {
                        job.setMapOutputValueClass(Put.class);
                        job.setCombinerClass(PutCombiner.class);
                    }
                    HFileOutputFormat2.configureIncrementalLoad(job, table, table);
                }
            } else {
                if (!admin.tableExists(tableName)) {
                    String errorMsg = format("Table '%s' does not exist.", tableName);
                    LOG.error(errorMsg);
                    throw new TableNotFoundException(errorMsg);
                }
                if (mapperClass.equals(TsvImporterTextMapper.class)) {
                    usage(TsvImporterTextMapper.class.toString()
                            + " should not be used for non bulkloading case. use "
                            + TsvImporterMapper.class.toString()
                            + " or custom mapper whose value type is Put.");
                    System.exit(-1);
                }
                // No reducers. Just write straight to table. Call initTableReducerJob
                // to set up the TableOutputFormat.
                TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
                job.setNumReduceTasks(0);
            }

            TableMapReduceUtil.addDependencyJars(job);
            TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
                    com.google.common.base.Function.class /* Guava used by TsvParser */);
        }
    }
    return job;
}

From source file:ParascaleFileSystem.java

License:Apache License

/**
 * {@inheritDoc}/* w  w w  .jav  a  2 s  .  c  om*/
 */
@Override
public void initialize(final URI uri, final Configuration conf) throws IOException {
    final URI rawUri;
    final RawParascaleFileSystem rawParascaleFileSystem;
    UserGroupInformation groupInformation;
    try {
        if (conf.get("hadoop.job.ugi") != null) {
            String username = new StringTokenizer(conf.get("hadoop.job.ugi"), ",").nextToken();
            groupInformation = UserGroupInformation.createRemoteUser(username);
        } else {
            groupInformation = UserGroupInformation.getCurrentUser();
        }
        rawParascaleFileSystem = new RawParascaleFileSystem(groupInformation);
        fs = conf.getBoolean(CRC_FILESYSTEM, false) ? new ChecksumFsWrapper(rawParascaleFileSystem)
                : rawParascaleFileSystem;
        rawUri = new URI(uri.getScheme(), uri.getAuthority(), null, null, null);
    } catch (final URISyntaxException e) {
        throw (IOException) new IOException().initCause(e);
    }
    // initialize with the raw URI - RawFS expects it without a path!
    fs.initialize(rawUri, conf);
    if (!rawParascaleFileSystem.isMountPointAbsolute()) {
        throw new IOException(
                "Mountpoint " + rawParascaleFileSystem.getMountPoint() + " is not an absolute path");
    }
    if (!rawParascaleFileSystem.mountPointExists()) {
        throw new IOException("WorkingDirectory does not exist - can not mount Parascale " + "filesystem at "
                + rawParascaleFileSystem.getMountPath());
    }
    if (!rawParascaleFileSystem.createHomeDirectory()) {
        throw new IOException("Can not create HomeDirectory");
    }

}

From source file:ai.grakn.graph.internal.computer.GraknSparkComputer.java

License:Open Source License

private static void loadJars(final JavaSparkContext sparkContext, final Configuration hadoopConfiguration) {
    if (hadoopConfiguration.getBoolean(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, true)) {
        final String hadoopGremlinLocalLibs = null == System.getProperty(Constants.HADOOP_GREMLIN_LIBS)
                ? System.getenv(Constants.HADOOP_GREMLIN_LIBS)
                : System.getProperty(Constants.HADOOP_GREMLIN_LIBS);
        if (null == hadoopGremlinLocalLibs) {
            LOGGER.warn(Constants.HADOOP_GREMLIN_LIBS + " is not set -- proceeding regardless");
        } else {/* w w  w.ja va2 s . co  m*/
            final String[] paths = hadoopGremlinLocalLibs.split(":");
            for (final String path : paths) {
                final File file = new File(path);
                if (file.exists()) {
                    Stream.of(file.listFiles()).filter(f -> f.getName().endsWith(Constants.DOT_JAR))
                            .forEach(f -> sparkContext.addJar(f.getAbsolutePath()));
                } else {
                    LOGGER.warn(path + " does not reference a valid directory -- proceeding regardless");
                }
            }
        }
    }
}

From source file:ai.grakn.kb.internal.computer.GraknSparkComputer.java

License:Open Source License

@SuppressWarnings("PMD.UnusedFormalParameter")
private Future<ComputerResult> submitWithExecutor() {
    jobGroupId = Integer.toString(ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE));
    String jobDescription = this.vertexProgram == null ? this.mapReducers.toString()
            : this.vertexProgram + "+" + this.mapReducers;

    // Use different output locations
    this.sparkConfiguration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION,
            this.sparkConfiguration.getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION) + "/" + jobGroupId);

    updateConfigKeys(sparkConfiguration);

    final Future<ComputerResult> result = computerService.submit(() -> {
        final long startTime = System.currentTimeMillis();

        // apache and hadoop configurations that are used throughout the graph computer computation
        final org.apache.commons.configuration.Configuration graphComputerConfiguration = new HadoopConfiguration(
                this.sparkConfiguration);
        if (!graphComputerConfiguration.containsKey(Constants.SPARK_SERIALIZER)) {
            graphComputerConfiguration.setProperty(Constants.SPARK_SERIALIZER,
                    GryoSerializer.class.getCanonicalName());
        }/*from w  w w . j a va 2 s .  c o m*/
        graphComputerConfiguration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER_HAS_EDGES,
                this.persist.equals(GraphComputer.Persist.EDGES));

        final Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(graphComputerConfiguration);

        final Storage fileSystemStorage = FileSystemStorage.open(hadoopConfiguration);
        final boolean inputFromHDFS = FileInputFormat.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class));
        final boolean inputFromSpark = PersistedInputRDD.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class));
        final boolean outputToHDFS = FileOutputFormat.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class));
        final boolean outputToSpark = PersistedOutputRDD.class.isAssignableFrom(
                hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class));
        final boolean skipPartitioner = graphComputerConfiguration
                .getBoolean(Constants.GREMLIN_SPARK_SKIP_PARTITIONER, false);
        final boolean skipPersist = graphComputerConfiguration
                .getBoolean(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE, false);

        if (inputFromHDFS) {
            String inputLocation = Constants
                    .getSearchGraphLocation(hadoopConfiguration.get(Constants.GREMLIN_HADOOP_INPUT_LOCATION),
                            fileSystemStorage)
                    .orElse(null);
            if (null != inputLocation) {
                try {
                    graphComputerConfiguration.setProperty(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR,
                            FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath()
                                    .toString());
                    hadoopConfiguration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR,
                            FileSystem.get(hadoopConfiguration).getFileStatus(new Path(inputLocation)).getPath()
                                    .toString());
                } catch (final IOException e) {
                    throw new IllegalStateException(e.getMessage(), e);
                }
            }
        }

        final InputRDD inputRDD;
        final OutputRDD outputRDD;
        final boolean filtered;
        try {
            inputRDD = InputRDD.class.isAssignableFrom(
                    hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, Object.class))
                            ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER,
                                    InputRDD.class, InputRDD.class).newInstance()
                            : InputFormatRDD.class.newInstance();
            outputRDD = OutputRDD.class.isAssignableFrom(
                    hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER, Object.class))
                            ? hadoopConfiguration.getClass(Constants.GREMLIN_HADOOP_GRAPH_WRITER,
                                    OutputRDD.class, OutputRDD.class).newInstance()
                            : OutputFormatRDD.class.newInstance();

            // if the input class can filter on load, then set the filters
            if (inputRDD instanceof InputFormatRDD
                    && GraphFilterAware.class.isAssignableFrom(hadoopConfiguration.getClass(
                            Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class, InputFormat.class))) {
                GraphFilterAware.storeGraphFilter(graphComputerConfiguration, hadoopConfiguration,
                        this.graphFilter);
                filtered = false;
            } else if (inputRDD instanceof GraphFilterAware) {
                ((GraphFilterAware) inputRDD).setGraphFilter(this.graphFilter);
                filtered = false;
            } else
                filtered = this.graphFilter.hasFilter();
        } catch (final InstantiationException | IllegalAccessException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }

        // create the spark context from the graph computer configuration
        final JavaSparkContext sparkContext = new JavaSparkContext(Spark.create(hadoopConfiguration));
        final Storage sparkContextStorage = SparkContextStorage.open();

        sparkContext.setJobGroup(jobGroupId, jobDescription);

        GraknSparkMemory memory = null;
        // delete output location
        final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);
        if (null != outputLocation) {
            if (outputToHDFS && fileSystemStorage.exists(outputLocation)) {
                fileSystemStorage.rm(outputLocation);
            }
            if (outputToSpark && sparkContextStorage.exists(outputLocation)) {
                sparkContextStorage.rm(outputLocation);
            }
        }

        // the Spark application name will always be set by SparkContextStorage,
        // thus, INFO the name to make it easier to debug
        logger.debug(Constants.GREMLIN_HADOOP_SPARK_JOB_PREFIX
                + (null == this.vertexProgram ? "No VertexProgram" : this.vertexProgram) + "["
                + this.mapReducers + "]");

        // add the project jars to the cluster
        this.loadJars(hadoopConfiguration, sparkContext);
        updateLocalConfiguration(sparkContext, hadoopConfiguration);

        // create a message-passing friendly rdd from the input rdd
        boolean partitioned = false;
        JavaPairRDD<Object, VertexWritable> loadedGraphRDD = inputRDD.readGraphRDD(graphComputerConfiguration,
                sparkContext);

        // if there are vertex or edge filters, filter the loaded graph rdd prior to partitioning and persisting
        if (filtered) {
            this.logger.debug("Filtering the loaded graphRDD: " + this.graphFilter);
            loadedGraphRDD = GraknSparkExecutor.applyGraphFilter(loadedGraphRDD, this.graphFilter);
        }
        // if the loaded graph RDD is already partitioned use that partitioner,
        // else partition it with HashPartitioner
        if (loadedGraphRDD.partitioner().isPresent()) {
            this.logger.debug("Using the existing partitioner associated with the loaded graphRDD: "
                    + loadedGraphRDD.partitioner().get());
        } else {
            if (!skipPartitioner) {
                final Partitioner partitioner = new HashPartitioner(
                        this.workersSet ? this.workers : loadedGraphRDD.partitions().size());
                this.logger.debug("Partitioning the loaded graphRDD: " + partitioner);
                loadedGraphRDD = loadedGraphRDD.partitionBy(partitioner);
                partitioned = true;
                assert loadedGraphRDD.partitioner().isPresent();
            } else {
                // no easy way to test this with a test case
                assert skipPartitioner == !loadedGraphRDD.partitioner().isPresent();

                this.logger.debug("Partitioning has been skipped for the loaded graphRDD via "
                        + Constants.GREMLIN_SPARK_SKIP_PARTITIONER);
            }
        }
        // if the loaded graphRDD was already partitioned previous,
        // then this coalesce/repartition will not take place
        if (this.workersSet) {
            // ensures that the loaded graphRDD does not have more partitions than workers
            if (loadedGraphRDD.partitions().size() > this.workers) {
                loadedGraphRDD = loadedGraphRDD.coalesce(this.workers);
            } else {
                // ensures that the loaded graphRDD does not have less partitions than workers
                if (loadedGraphRDD.partitions().size() < this.workers) {
                    loadedGraphRDD = loadedGraphRDD.repartition(this.workers);
                }
            }
        }
        // persist the vertex program loaded graph as specified by configuration
        // or else use default cache() which is MEMORY_ONLY
        if (!skipPersist && (!inputFromSpark || partitioned || filtered)) {
            loadedGraphRDD = loadedGraphRDD.persist(StorageLevel.fromString(
                    hadoopConfiguration.get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
        }
        // final graph with view
        // (for persisting and/or mapReducing -- may be null and thus, possible to save space/time)
        JavaPairRDD<Object, VertexWritable> computedGraphRDD = null;
        try {
            ////////////////////////////////
            // process the vertex program //
            ////////////////////////////////
            if (null != this.vertexProgram) {
                memory = new GraknSparkMemory(this.vertexProgram, this.mapReducers, sparkContext);
                /////////////////
                // if there is a registered VertexProgramInterceptor, use it to bypass the GraphComputer semantics
                if (graphComputerConfiguration
                        .containsKey(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR)) {
                    try {
                        final GraknSparkVertexProgramInterceptor<VertexProgram> interceptor = (GraknSparkVertexProgramInterceptor) Class
                                .forName(graphComputerConfiguration
                                        .getString(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR))
                                .newInstance();
                        computedGraphRDD = interceptor.apply(this.vertexProgram, loadedGraphRDD, memory);
                    } catch (final ClassNotFoundException | IllegalAccessException | InstantiationException e) {
                        throw new IllegalStateException(e.getMessage());
                    }
                } else {
                    // standard GraphComputer semantics
                    // get a configuration that will be propagated to all workers
                    final HadoopConfiguration vertexProgramConfiguration = new HadoopConfiguration();
                    this.vertexProgram.storeState(vertexProgramConfiguration);
                    // set up the vertex program and wire up configurations
                    this.vertexProgram.setup(memory);
                    JavaPairRDD<Object, ViewIncomingPayload<Object>> viewIncomingRDD = null;
                    memory.broadcastMemory(sparkContext);
                    // execute the vertex program
                    while (true) {
                        if (Thread.interrupted()) {
                            sparkContext.cancelAllJobs();
                            throw new TraversalInterruptedException();
                        }
                        memory.setInExecute(true);
                        viewIncomingRDD = GraknSparkExecutor.executeVertexProgramIteration(loadedGraphRDD,
                                viewIncomingRDD, memory, graphComputerConfiguration,
                                vertexProgramConfiguration);
                        memory.setInExecute(false);
                        if (this.vertexProgram.terminate(memory)) {
                            break;
                        } else {
                            memory.incrIteration();
                            memory.broadcastMemory(sparkContext);
                        }
                    }
                    // if the graph will be continued to be used (persisted or mapreduced),
                    // then generate a view+graph
                    if ((null != outputRDD && !this.persist.equals(Persist.NOTHING))
                            || !this.mapReducers.isEmpty()) {
                        computedGraphRDD = GraknSparkExecutor.prepareFinalGraphRDD(loadedGraphRDD,
                                viewIncomingRDD, this.vertexProgram.getVertexComputeKeys());
                        assert null != computedGraphRDD && computedGraphRDD != loadedGraphRDD;
                    } else {
                        // ensure that the computedGraphRDD was not created
                        assert null == computedGraphRDD;
                    }
                }
                /////////////////
                memory.complete(); // drop all transient memory keys
                // write the computed graph to the respective output (rdd or output format)
                if (null != outputRDD && !this.persist.equals(Persist.NOTHING)) {
                    // the logic holds that a computeGraphRDD must be created at this point
                    assert null != computedGraphRDD;

                    outputRDD.writeGraphRDD(graphComputerConfiguration, computedGraphRDD);
                }
            }

            final boolean computedGraphCreated = computedGraphRDD != null && computedGraphRDD != loadedGraphRDD;
            if (!computedGraphCreated) {
                computedGraphRDD = loadedGraphRDD;
            }

            final Memory.Admin finalMemory = null == memory ? new MapMemory() : new MapMemory(memory);

            //////////////////////////////
            // process the map reducers //
            //////////////////////////////
            if (!this.mapReducers.isEmpty()) {
                // create a mapReduceRDD for executing the map reduce jobs on
                JavaPairRDD<Object, VertexWritable> mapReduceRDD = computedGraphRDD;
                if (computedGraphCreated && !outputToSpark) {
                    // drop all the edges of the graph as they are not used in mapReduce processing
                    mapReduceRDD = computedGraphRDD.mapValues(vertexWritable -> {
                        vertexWritable.get().dropEdges(Direction.BOTH);
                        return vertexWritable;
                    });
                    // if there is only one MapReduce to execute, don't bother wasting the clock cycles.
                    if (this.mapReducers.size() > 1) {
                        mapReduceRDD = mapReduceRDD.persist(StorageLevel.fromString(hadoopConfiguration
                                .get(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY")));
                    }
                }

                for (final MapReduce mapReduce : this.mapReducers) {
                    // execute the map reduce job
                    final HadoopConfiguration newApacheConfiguration = new HadoopConfiguration(
                            graphComputerConfiguration);
                    mapReduce.storeState(newApacheConfiguration);
                    // map
                    final JavaPairRDD mapRDD = GraknSparkExecutor.executeMap(mapReduceRDD, mapReduce,
                            newApacheConfiguration);
                    // combine
                    final JavaPairRDD combineRDD = mapReduce.doStage(MapReduce.Stage.COMBINE)
                            ? GraknSparkExecutor.executeCombine(mapRDD, newApacheConfiguration)
                            : mapRDD;
                    // reduce
                    final JavaPairRDD reduceRDD = mapReduce.doStage(MapReduce.Stage.REDUCE)
                            ? GraknSparkExecutor.executeReduce(combineRDD, mapReduce, newApacheConfiguration)
                            : combineRDD;
                    // write the map reduce output back to disk and computer result memory
                    if (null != outputRDD) {
                        mapReduce.addResultToMemory(finalMemory, outputRDD.writeMemoryRDD(
                                graphComputerConfiguration, mapReduce.getMemoryKey(), reduceRDD));
                    }
                }
                // if the mapReduceRDD is not simply the computed graph, unpersist the mapReduceRDD
                if (computedGraphCreated && !outputToSpark) {
                    assert loadedGraphRDD != computedGraphRDD;
                    assert mapReduceRDD != computedGraphRDD;
                    mapReduceRDD.unpersist();
                } else {
                    assert mapReduceRDD == computedGraphRDD;
                }
            }

            // unpersist the loaded graph if it will not be used again (no PersistedInputRDD)
            // if the graphRDD was loaded from Spark, but then partitioned or filtered, its a different RDD
            if (!inputFromSpark || partitioned || filtered) {
                loadedGraphRDD.unpersist();
            }
            // unpersist the computed graph if it will not be used again (no PersistedOutputRDD)
            // if the computed graph is the loadedGraphRDD because it was not mutated and not-unpersisted,
            // then don't unpersist the computedGraphRDD/loadedGraphRDD
            if ((!outputToSpark || this.persist.equals(GraphComputer.Persist.NOTHING))
                    && computedGraphCreated) {
                computedGraphRDD.unpersist();
            }
            // delete any file system or rdd data if persist nothing
            if (null != outputLocation && this.persist.equals(GraphComputer.Persist.NOTHING)) {
                if (outputToHDFS) {
                    fileSystemStorage.rm(outputLocation);
                }
                if (outputToSpark) {
                    sparkContextStorage.rm(outputLocation);
                }
            }
            // update runtime and return the newly computed graph
            finalMemory.setRuntime(System.currentTimeMillis() - startTime);
            // clear properties that should not be propagated in an OLAP chain
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_GRAPH_FILTER);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_HADOOP_VERTEX_PROGRAM_INTERCEPTOR);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE);
            graphComputerConfiguration.clearProperty(Constants.GREMLIN_SPARK_SKIP_PARTITIONER);
            return new DefaultComputerResult(InputOutputHelper.getOutputGraph(graphComputerConfiguration,
                    this.resultGraph, this.persist), finalMemory.asImmutable());
        } catch (Exception e) {
            // So it throws the same exception as tinker does
            throw new RuntimeException(e);
        }
    });
    computerService.shutdown();
    return result;
}

From source file:alluxio.underfs.hdfs.HdfsUnderFileSystemTest.java

License:Apache License

/**
 * Tests the {@link HdfsUnderFileSystem#prepareConfiguration} method.
 *
 * Checks the hdfs implements class and alluxio underfs config setting
 *//*from   w ww .  ja v  a 2  s.  c  o  m*/
@Test
public void prepareConfiguration() throws Exception {
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    mMockHdfsUnderFileSystem.prepareConfiguration("", conf);
    Assert.assertEquals("org.apache.hadoop.hdfs.DistributedFileSystem", conf.get("fs.hdfs.impl"));
    Assert.assertFalse(conf.getBoolean("fs.hdfs.impl.disable.cache", false));
    Assert.assertNotNull(conf.get(PropertyKey.UNDERFS_HDFS_CONFIGURATION.toString()));
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

@Override
public void setConf(Configuration conf) {
    this.conf = conf;
    try {//from w  ww.j  a  v  a  2  s  .co m
        FileSystem fs = FileSystem.get(inputPath.toUri(), conf);
        rowPath = fs.makeQualified(inputPath);
        outputTmpBasePath = fs.makeQualified(outputTmpPath);
        keepTempFiles = conf.getBoolean(KEEP_TEMP_FILES, false);
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:at.illecker.hama.hybrid.examples.testglobalgpusync.TestGlobalGpuSyncHybridBSP.java

License:Apache License

public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException {

    // Defaults//from  w w  w  .  ja v a 2 s . co m
    int numBspTask = 1;
    int numGpuBspTask = 1;
    int blockSize = BLOCK_SIZE;
    int gridSize = GRID_SIZE;
    boolean isDebugging = false;

    Configuration conf = new HamaConfiguration();

    if (args.length > 0) {
        if (args.length == 5) {
            numBspTask = Integer.parseInt(args[0]);
            numGpuBspTask = Integer.parseInt(args[1]);
            blockSize = Integer.parseInt(args[2]);
            gridSize = Integer.parseInt(args[3]);
            isDebugging = Boolean.parseBoolean(args[4]);
        } else {
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
            System.out.println("    Argument2=numGpuBspTask");
            System.out.println("    Argument3=blockSize");
            System.out.println("    Argument4=gridSize");
            System.out.println("    Argument5=debug | Enable debugging (true|false=default)");
            return;
        }
    }

    // Set config variables
    conf.setBoolean("hama.pipes.logging", isDebugging);
    // Set CPU tasks
    conf.setInt("bsp.peers.num", numBspTask);
    // Set GPU tasks
    conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
    // Set GPU blockSize and gridSize
    conf.set(CONF_BLOCK_SIZE, "" + blockSize);
    conf.set(CONF_GRID_SIZE, "" + gridSize);
    conf.set(CONF_TMP_DIR, TMP_DIR.toString());

    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
    LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
    LOG.info("BlockSize: " + conf.get(CONF_BLOCK_SIZE));
    LOG.info("GridSize: " + conf.get(CONF_GRID_SIZE));
    LOG.info("TempDir: " + conf.get(CONF_TMP_DIR));
    LOG.info("isDebugging: " + conf.getBoolean("hama.pipes.logging", false));

    BSPJob job = createTestGlobalGpuSyncHybridBSPConf(conf);

    long startTime = System.currentTimeMillis();
    if (job.waitForCompletion(true)) {
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
        printOutput(job, FileSystem.get(conf), new Path(conf.get(CONF_TMP_DIR)));
    }
}

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.cpu.MatrixMultiplicationBSPCpu.java

License:Apache License

@Override
public void setup(BSPPeer<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> peer)
        throws IOException {

    Configuration conf = peer.getConfiguration();

    outCardinality = conf.getInt(OUT_CARD, Integer.MAX_VALUE);
    isDebuggingEnabled = conf.getBoolean(DEBUG, false);

    // Choose one as a master, who sorts the matrix rows at the end
    this.masterTask = peer.getPeerName(peer.getNumPeers() / 2);

    // Init logging
    if (isDebuggingEnabled) {
        try {//  w w w .  j a v  a  2  s.  c  om
            FileSystem fs = FileSystem.get(conf);
            logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf))
                    + "/BSP_" + peer.getTaskId() + ".log"));

            logger.writeChars("bsp,setup,outCardinality=" + outCardinality + "\n");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.cpu.MatrixMultiplicationBSPCpu.java

License:Apache License

@Override
public void setup(
        BSPPeer<IntWritable, PipesVectorWritable, IntWritable, PipesVectorWritable, MatrixRowMessage> peer)
        throws IOException {

    Configuration conf = peer.getConfiguration();
    m_isDebuggingEnabled = conf.getBoolean(CONF_DEBUG, false);

    // Choose one as a master, who sorts the matrix rows at the end
    // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2);
    // TODO/*w  ww . ja  va 2 s.com*/
    // task must be 0 otherwise write out does NOT work!
    m_masterTask = peer.getPeerName(0);

    // Init logging
    if (m_isDebuggingEnabled) {
        try {
            FileSystem fs = FileSystem.get(conf);
            m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf))
                    + "/BSP_" + peer.getTaskId() + ".log"));

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // Receive transposed Matrix B
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf),
            new Path(conf.get(CONF_MATRIX_MULT_B_PATH)), conf);

    IntWritable bKey = new IntWritable();
    PipesVectorWritable bVector = new PipesVectorWritable();
    // for each col of matrix B (cause by transposed B)
    while (reader.next(bKey, bVector)) {
        m_bColumns.add(new KeyValuePair<Integer, DoubleVector>(bKey.get(), bVector.getVector()));
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("setup,read,transposedMatrixB,key=" + bKey.get() + ",value="
                    + bVector.getVector().toString() + "\n");
        }
    }
    reader.close();
}

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.gpu.MatrixMultiplicationBSPGpu.java

License:Apache License

@Override
public void setup(BSPPeer<IntWritable, VectorWritable, IntWritable, VectorWritable, NullWritable> peer)
        throws IOException {

    Configuration conf = peer.getConfiguration();
    m_isDebuggingEnabled = conf.getBoolean(CONF_DEBUG, false);

    // Choose one as a master, who sorts the matrix rows at the end
    // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2);
    // TODO/*from   www. j a v  a2 s . c  om*/
    // task must be 0 otherwise write out does NOT work!
    m_masterTask = peer.getPeerName(0);

    this.m_blockSize = Integer.parseInt(peer.getConfiguration().get(CONF_BLOCKSIZE));

    this.m_gridSize = Integer.parseInt(peer.getConfiguration().get(CONF_GRIDSIZE));

    // Init logging
    if (m_isDebuggingEnabled) {
        try {
            FileSystem fs = FileSystem.get(conf);
            m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf))
                    + "/BSP_" + peer.getTaskId() + ".log"));

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // Load matrixB
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf),
            new Path(conf.get(CONF_MATRIX_MULT_B_PATH)), conf);

    List<DoubleVector> matrixB = new ArrayList<DoubleVector>();
    IntWritable bKey = new IntWritable();
    VectorWritable bVector = new VectorWritable();
    // for each row of matrix B
    while (reader.next(bKey, bVector)) {
        matrixB.add(bVector.getVector());

        if (m_isDebuggingEnabled) {
            m_logger.writeChars("bsp,setup,MatrixB (" + bKey.get() + "," + bVector.getVector() + ")\n");
        }
    }
    reader.close();

    // Convert matrixB to double array for GPU kernels
    m_matrixBArr = toArray(matrixB);

    if (m_isDebuggingEnabled) {
        for (int i = 0; i < m_matrixBArr.length; i++) {
            m_logger.writeChars("bsp,setup,MatrixBArr (" + i + "," + Arrays.toString(m_matrixBArr[i]) + ")\n");
        }
    }

    // threadSliceSize defines how much multipliers
    // of column B has to be multiplied with column A
    m_threadSliceSize = divup(m_matrixBArr.length, m_blockSize);

    // blockSliceSize defines the column slice amount
    // columns of B per blockIters
    m_blockSliceSize = divup(m_matrixBArr[0].length, m_gridSize);

    if (m_isDebuggingEnabled) {
        m_logger.writeChars("bsp,setup,blockSize=" + m_blockSize + ",gridSize=" + m_gridSize
                + ",threadSliceSize=" + m_threadSliceSize + ",blockSliceSize=" + m_blockSliceSize + "\n");
    }
}