Example usage for org.apache.hadoop.fs FileSystem getConf

List of usage examples for org.apache.hadoop.fs FileSystem getConf

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getConf.

Prototype

@Override
    public Configuration getConf() 

Source Link

Usage

From source file:io.prestosql.plugin.hive.BackgroundHiveSplitLoader.java

License:Apache License

private ListenableFuture<?> loadPartition(HivePartitionMetadata partition) throws IOException {
    String partitionName = partition.getHivePartition().getPartitionId();
    Properties schema = getPartitionSchema(table, partition.getPartition());
    List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition());
    TupleDomain<HiveColumnHandle> effectivePredicate = (TupleDomain<HiveColumnHandle>) compactEffectivePredicate;

    Path path = new Path(getPartitionLocation(table, partition.getPartition()));
    Configuration configuration = hdfsEnvironment.getConfiguration(hdfsContext, path);
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
    FileSystem fs = hdfsEnvironment.getFileSystem(hdfsContext, path);
    boolean s3SelectPushdownEnabled = shouldEnablePushdownForTable(session, table, path.toString(),
            partition.getPartition());//w w w. ja v a2 s.  c o  m

    if (inputFormat instanceof SymlinkTextInputFormat) {
        if (tableBucketInfo.isPresent()) {
            throw new PrestoException(NOT_SUPPORTED,
                    "Bucketed table in SymlinkTextInputFormat is not yet supported");
        }

        // TODO: This should use an iterator like the HiveFileIterator
        ListenableFuture<?> lastResult = COMPLETED_FUTURE;
        for (Path targetPath : getTargetPathsFromSymlink(fs, path)) {
            // The input should be in TextInputFormat.
            TextInputFormat targetInputFormat = new TextInputFormat();
            // the splits must be generated using the file system for the target path
            // get the configuration for the target path -- it may be a different hdfs instance
            FileSystem targetFilesystem = hdfsEnvironment.getFileSystem(hdfsContext, targetPath);
            JobConf targetJob = toJobConf(targetFilesystem.getConf());
            targetJob.setInputFormat(TextInputFormat.class);
            targetInputFormat.configure(targetJob);
            FileInputFormat.setInputPaths(targetJob, targetPath);
            InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0);

            InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(targetFilesystem,
                    partitionName, inputFormat, schema, partitionKeys, effectivePredicate,
                    partition.getColumnCoercions(), Optional.empty(), isForceLocalScheduling(session),
                    s3SelectPushdownEnabled);
            lastResult = addSplitsToSource(targetSplits, splitFactory);
            if (stopped) {
                return COMPLETED_FUTURE;
            }
        }
        return lastResult;
    }

    Optional<BucketConversion> bucketConversion = Optional.empty();
    boolean bucketConversionRequiresWorkerParticipation = false;
    if (partition.getPartition().isPresent()) {
        Optional<HiveBucketProperty> partitionBucketProperty = partition.getPartition().get().getStorage()
                .getBucketProperty();
        if (tableBucketInfo.isPresent() && partitionBucketProperty.isPresent()) {
            int readBucketCount = tableBucketInfo.get().getReadBucketCount();
            int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
            // Validation was done in HiveSplitManager#getPartitionMetadata.
            // Here, it's just trying to see if its needs the BucketConversion.
            if (readBucketCount != partitionBucketCount) {
                bucketConversion = Optional.of(new BucketConversion(readBucketCount, partitionBucketCount,
                        tableBucketInfo.get().getBucketColumns()));
                if (readBucketCount > partitionBucketCount) {
                    bucketConversionRequiresWorkerParticipation = true;
                }
            }
        }
    }
    InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(fs, partitionName, inputFormat, schema,
            partitionKeys, effectivePredicate, partition.getColumnCoercions(),
            bucketConversionRequiresWorkerParticipation ? bucketConversion : Optional.empty(),
            isForceLocalScheduling(session), s3SelectPushdownEnabled);

    // To support custom input formats, we want to call getSplits()
    // on the input format to obtain file splits.
    if (shouldUseFileSplitsFromInputFormat(inputFormat)) {
        if (tableBucketInfo.isPresent()) {
            throw new PrestoException(NOT_SUPPORTED,
                    "Presto cannot read bucketed partition in an input format with UseFileSplitsFromInputFormat annotation: "
                            + inputFormat.getClass().getSimpleName());
        }
        JobConf jobConf = toJobConf(configuration);
        FileInputFormat.setInputPaths(jobConf, path);
        InputSplit[] splits = inputFormat.getSplits(jobConf, 0);

        return addSplitsToSource(splits, splitFactory);
    }

    // Bucketed partitions are fully loaded immediately since all files must be loaded to determine the file to bucket mapping
    if (tableBucketInfo.isPresent()) {
        return hiveSplitSource
                .addToQueue(getBucketedSplits(path, fs, splitFactory, tableBucketInfo.get(), bucketConversion));
    }

    // S3 Select pushdown works at the granularity of individual S3 objects,
    // therefore we must not split files when it is enabled.
    boolean splittable = getHeaderCount(schema) == 0 && getFooterCount(schema) == 0 && !s3SelectPushdownEnabled;
    fileIterators.addLast(createInternalHiveSplitIterator(path, fs, splitFactory, splittable));
    return COMPLETED_FUTURE;
}

From source file:io.spring.batch.workflow.configuration.WorkflowConfiguration.java

License:Apache License

@Bean
@StepScope//w w  w  .ja  v  a  2s  . c  om
public Tasklet ingestTasklet(FileSystem fileSystem) {
    return new Tasklet() {
        @Override
        public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception {
            String inputDir = (String) chunkContext.getStepContext().getJobParameters().get("inputDir");
            String ingestedDir = "ingested";

            if (inputDir.endsWith("/")) {
                inputDir = inputDir.substring(0, inputDir.length() - 1);
            }

            String outputDir = inputDir + "ingested";

            System.out.println(">> INPUT DIR = " + inputDir + " OUTPUT DIR = " + outputDir);

            FileUtil.copyMerge(fileSystem, new Path(inputDir), fileSystem,
                    new Path(outputDir + "/ingested.out"), false, fileSystem.getConf(), "");
            return RepeatStatus.FINISHED;
        }
    };
}

From source file:io.spring.batch.workflow.configuration.WorkflowConfiguration.java

License:Apache License

@Bean
@StepScope//from   w w w. j av a  2s  .c  om
public Tasklet archiveTasklet(FileSystem fileSystem, @Value("#jobParameters['inputDir']") String inputDir,
        @Value("#jobParameters['archiveDir'") String archiveDir) {
    return new Tasklet() {
        @Override
        public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception {
            FileUtil.copy(fileSystem, new Path(inputDir), fileSystem, new Path("/probe/archive/" + archiveDir),
                    true, fileSystem.getConf());
            return RepeatStatus.FINISHED;
        }
    };
}

From source file:ivory.core.data.document.IntDocVectorsForwardIndex.java

License:Apache License

/**
 * Creates an {@code IntDocVectorsIndex} object.
 *
 * @param indexPath location of the index file
 * @param fs handle to the FileSystem//w w w . j a v a 2 s.  c  o  m
 * @param weighted {@code true} to load weighted document vectors
 * @throws IOException
 */
public IntDocVectorsForwardIndex(String indexPath, FileSystem fs, boolean weighted) throws IOException {
    this.fs = Preconditions.checkNotNull(fs);
    this.conf = fs.getConf();

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    path = (weighted ? env.getWeightedIntDocVectorsDirectory() : env.getIntDocVectorsDirectory());

    String forwardIndexPath = (weighted ? env.getWeightedIntDocVectorsForwardIndex()
            : env.getIntDocVectorsForwardIndex());
    FSDataInputStream posInput = fs.open(new Path(forwardIndexPath));

    docnoOffset = posInput.readInt();
    collectionDocumentCount = posInput.readInt();

    positions = new long[collectionDocumentCount];
    for (int i = 0; i < collectionDocumentCount; i++) {
        positions[i] = posInput.readLong();
    }
}

From source file:ivory.core.data.document.TermDocVectorsForwardIndex.java

License:Apache License

/**
 * Creates a {code TermDocVectorsIndex} object.
 *
 * @param indexPath location of the index file
 * @param fs handle to the FileSystem//from   w w w .  ja  v a  2 s . co  m
 * @throws IOException
 */
public TermDocVectorsForwardIndex(String indexPath, FileSystem fs) throws IOException {
    Preconditions.checkNotNull(indexPath);
    this.fs = Preconditions.checkNotNull(fs);
    conf = fs.getConf();

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    path = env.getTermDocVectorsDirectory();

    FSDataInputStream posInput = fs.open(new Path(env.getTermDocVectorsForwardIndex()));

    docnoOffset = posInput.readInt();
    collectionDocumentCount = posInput.readInt();

    positions = new long[collectionDocumentCount];
    for (int i = 0; i < collectionDocumentCount; i++) {
        positions[i] = posInput.readLong();
    }
    posInput.close();
}

From source file:ivory.core.data.index.IntPostingsForwardIndex.java

License:Apache License

public IntPostingsForwardIndex(String indexPath, FileSystem fs) throws IOException {
    this.fs = fs;
    this.conf = fs.getConf();
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    postingsPath = env.getPostingsDirectory();

    FSDataInputStream posInput = fs.open(new Path(env.getPostingsIndexData()));

    int l = posInput.readInt();
    positions = new long[l];
    for (int i = 0; i < l; i++) {
        positions[i] = posInput.readLong();
    }//  ww  w .  j  av a2  s  . c om
}

From source file:ivory.data.IntDocVectorsForwardIndex.java

License:Apache License

public IntDocVectorsForwardIndex(String indexPath, FileSystem fs, boolean weighted) throws IOException {
    mFs = fs;//from  www  .  j av  a 2s. c  o  m
    mConf = fs.getConf();

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    mPath = (weighted ? env.getWeightedIntDocVectorsDirectory() : env.getIntDocVectorsDirectory());
    sLogger.debug("mPath: " + mPath);

    String forwardIndexPath = (weighted ? env.getWeightedIntDocVectorsForwardIndex()
            : env.getIntDocVectorsForwardIndex());
    sLogger.debug("forwardIndexPath: " + forwardIndexPath);
    FSDataInputStream posInput = fs.open(new Path(forwardIndexPath));

    mDocnoOffset = posInput.readInt();
    mCollectionDocumentCount = posInput.readInt();

    mPositions = new long[mCollectionDocumentCount];
    for (int i = 0; i < mCollectionDocumentCount; i++) {
        mPositions[i] = posInput.readLong();
    }
}

From source file:ivory.data.IntPostingsForwardIndex.java

License:Apache License

public IntPostingsForwardIndex(String indexPath, FileSystem fs) throws IOException {
    mFs = fs;//from w  ww  .j  a  v  a  2 s.c  o m
    conf = fs.getConf();
    postingsType = "ivory.data.PostingsListDocSortedPositional";
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    postingsPath = env.getPostingsDirectory();

    FSDataInputStream posInput = fs.open(new Path(env.getPostingsIndexData()));

    int l = posInput.readInt();
    positions = new long[l];
    for (int i = 0; i < l; i++) {
        positions[i] = posInput.readLong();
        //sLogger.info(positions[i]);
    }
}

From source file:ivory.data.TermDocVectorsForwardIndex.java

License:Apache License

/**
 * Creates a <code>TermDocVectorsIndex</code> object.
 * //  w  w  w .j ava2  s . com
 * @param indexPath  location of the index file
 * @param fs         handle to the FileSystem
 * @throws IOException
 */
public TermDocVectorsForwardIndex(String indexPath, FileSystem fs) throws IOException {
    this.fs = Preconditions.checkNotNull(fs);
    conf = fs.getConf();

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    path = env.getTermDocVectorsDirectory();

    FSDataInputStream posInput = fs.open(new Path(env.getTermDocVectorsForwardIndex()));

    docnoOffset = posInput.readInt();
    collectionDocumentCount = posInput.readInt();

    positions = new long[collectionDocumentCount];
    for (int i = 0; i < collectionDocumentCount; i++) {
        positions[i] = posInput.readLong();
    }
    posInput.close();
}

From source file:nthu.scopelab.stsqr.ssvd.SSVDRunner.java

License:Apache License

public void run() throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    String btPath = outputPath + "/BtJob/";

    if (overwrite) {
        fs.delete(new Path(outputPath), true);
    }// ww w.  ja va2  s. co  m
    //QJob    
    int[] iseed = { 0, 0, 0, 1 };
    double[] x = new double[1];
    Dlarnv.dlarnv(2, iseed, 0, 1, x, 0);
    long seed = (long) (x[0] * (double) Long.MAX_VALUE);

    ssvdQRJob qrjob = new ssvdQRJob(sc, Ardd, seed, k, p, redSche);
    qrjob.run();
    JavaPairRDD<Long, sLMatrixWritable> Qrdd = qrjob.getQrdd().cache();
    //BtJob
    BtJob btjob = new BtJob(sc, Ardd, Qrdd, k, p, outerBlockHeight, btPath);
    btjob.run();
    cmUpperTriangDenseMatrix bbt = btjob.getBBt();

    //removed ABtDense iteration part temporarily

    // convert bbt to something our eigensolver could understand
    assert bbt.numColumns() == k + p;

    double[][] bbtSquare = new double[k + p][];
    for (int i = 0; i < k + p; i++) {
        bbtSquare[i] = new double[k + p];
    }

    for (int i = 0; i < k + p; i++) {
        for (int j = i; j < k + p; j++) {
            bbtSquare[i][j] = bbtSquare[j][i] = bbt.get(i, j);
        }
    }
    double[] svalues = new double[k + p];

    // try something else.
    EigenSolver eigenWrapper = new EigenSolver(bbtSquare);

    double[] eigenva2 = eigenWrapper.getWR();
    for (int i = 0; i < k + p; i++) {
        svalues[i] = Math.sqrt(eigenva2[i]); // sqrt?
    }
    // save/redistribute UHat
    double[][] uHat = eigenWrapper.getVL();

    //uHat and svalues are necessary parameters for UJob and VJob
    cmDenseMatrix uHatMat = new cmDenseMatrix(uHat);
    svaluesVector = new DenseVector(svalues);
    //UJob
    UJob ujob = new UJob(Qrdd, uHatMat, svaluesVector, k, p, cUHalfSigma);
    ujob.run();
    //VJob
    //read Btrdd
    JavaPairRDD<IntWritable, VectorWritable> BtrddSeq = sc.hadoopFile(btPath, SequenceFileInputFormat.class,
            IntWritable.class, VectorWritable.class);
    VJob vjob = new VJob(BtrddSeq, uHatMat, svaluesVector, k, p, vrbs, cVHalfSigma);
    vjob.run();

    //output Urdd and Vrdd
    ujob.getUrdd().saveAsHadoopFile(uPath, LongWritable.class, sLMatrixWritable.class,
            SequenceFileOutputFormat.class);
    vjob.getVrdd().saveAsHadoopFile(vPath, LongWritable.class, sLMatrixWritable.class,
            SequenceFileOutputFormat.class);
    //output sigma
    SequenceFile.Writer svWriter = SequenceFile.createWriter(fs, fs.getConf(),
            new Path(sigmaPath + "/svalues.seq"), IntWritable.class, VectorWritable.class);

    svWriter.append(new IntWritable(0), new VectorWritable(new DenseVector(svalues, true)));

    svWriter.close();
}