List of usage examples for org.apache.hadoop.fs FileSystem getConf
@Override
public Configuration getConf()
From source file:io.prestosql.plugin.hive.BackgroundHiveSplitLoader.java
License:Apache License
private ListenableFuture<?> loadPartition(HivePartitionMetadata partition) throws IOException { String partitionName = partition.getHivePartition().getPartitionId(); Properties schema = getPartitionSchema(table, partition.getPartition()); List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition()); TupleDomain<HiveColumnHandle> effectivePredicate = (TupleDomain<HiveColumnHandle>) compactEffectivePredicate; Path path = new Path(getPartitionLocation(table, partition.getPartition())); Configuration configuration = hdfsEnvironment.getConfiguration(hdfsContext, path); InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false); FileSystem fs = hdfsEnvironment.getFileSystem(hdfsContext, path); boolean s3SelectPushdownEnabled = shouldEnablePushdownForTable(session, table, path.toString(), partition.getPartition());//w w w. ja v a2 s. c o m if (inputFormat instanceof SymlinkTextInputFormat) { if (tableBucketInfo.isPresent()) { throw new PrestoException(NOT_SUPPORTED, "Bucketed table in SymlinkTextInputFormat is not yet supported"); } // TODO: This should use an iterator like the HiveFileIterator ListenableFuture<?> lastResult = COMPLETED_FUTURE; for (Path targetPath : getTargetPathsFromSymlink(fs, path)) { // The input should be in TextInputFormat. TextInputFormat targetInputFormat = new TextInputFormat(); // the splits must be generated using the file system for the target path // get the configuration for the target path -- it may be a different hdfs instance FileSystem targetFilesystem = hdfsEnvironment.getFileSystem(hdfsContext, targetPath); JobConf targetJob = toJobConf(targetFilesystem.getConf()); targetJob.setInputFormat(TextInputFormat.class); targetInputFormat.configure(targetJob); FileInputFormat.setInputPaths(targetJob, targetPath); InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0); InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(targetFilesystem, partitionName, inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions(), Optional.empty(), isForceLocalScheduling(session), s3SelectPushdownEnabled); lastResult = addSplitsToSource(targetSplits, splitFactory); if (stopped) { return COMPLETED_FUTURE; } } return lastResult; } Optional<BucketConversion> bucketConversion = Optional.empty(); boolean bucketConversionRequiresWorkerParticipation = false; if (partition.getPartition().isPresent()) { Optional<HiveBucketProperty> partitionBucketProperty = partition.getPartition().get().getStorage() .getBucketProperty(); if (tableBucketInfo.isPresent() && partitionBucketProperty.isPresent()) { int readBucketCount = tableBucketInfo.get().getReadBucketCount(); int partitionBucketCount = partitionBucketProperty.get().getBucketCount(); // Validation was done in HiveSplitManager#getPartitionMetadata. // Here, it's just trying to see if its needs the BucketConversion. if (readBucketCount != partitionBucketCount) { bucketConversion = Optional.of(new BucketConversion(readBucketCount, partitionBucketCount, tableBucketInfo.get().getBucketColumns())); if (readBucketCount > partitionBucketCount) { bucketConversionRequiresWorkerParticipation = true; } } } } InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(fs, partitionName, inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions(), bucketConversionRequiresWorkerParticipation ? bucketConversion : Optional.empty(), isForceLocalScheduling(session), s3SelectPushdownEnabled); // To support custom input formats, we want to call getSplits() // on the input format to obtain file splits. if (shouldUseFileSplitsFromInputFormat(inputFormat)) { if (tableBucketInfo.isPresent()) { throw new PrestoException(NOT_SUPPORTED, "Presto cannot read bucketed partition in an input format with UseFileSplitsFromInputFormat annotation: " + inputFormat.getClass().getSimpleName()); } JobConf jobConf = toJobConf(configuration); FileInputFormat.setInputPaths(jobConf, path); InputSplit[] splits = inputFormat.getSplits(jobConf, 0); return addSplitsToSource(splits, splitFactory); } // Bucketed partitions are fully loaded immediately since all files must be loaded to determine the file to bucket mapping if (tableBucketInfo.isPresent()) { return hiveSplitSource .addToQueue(getBucketedSplits(path, fs, splitFactory, tableBucketInfo.get(), bucketConversion)); } // S3 Select pushdown works at the granularity of individual S3 objects, // therefore we must not split files when it is enabled. boolean splittable = getHeaderCount(schema) == 0 && getFooterCount(schema) == 0 && !s3SelectPushdownEnabled; fileIterators.addLast(createInternalHiveSplitIterator(path, fs, splitFactory, splittable)); return COMPLETED_FUTURE; }
From source file:io.spring.batch.workflow.configuration.WorkflowConfiguration.java
License:Apache License
@Bean @StepScope//w w w .ja v a 2s . c om public Tasklet ingestTasklet(FileSystem fileSystem) { return new Tasklet() { @Override public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { String inputDir = (String) chunkContext.getStepContext().getJobParameters().get("inputDir"); String ingestedDir = "ingested"; if (inputDir.endsWith("/")) { inputDir = inputDir.substring(0, inputDir.length() - 1); } String outputDir = inputDir + "ingested"; System.out.println(">> INPUT DIR = " + inputDir + " OUTPUT DIR = " + outputDir); FileUtil.copyMerge(fileSystem, new Path(inputDir), fileSystem, new Path(outputDir + "/ingested.out"), false, fileSystem.getConf(), ""); return RepeatStatus.FINISHED; } }; }
From source file:io.spring.batch.workflow.configuration.WorkflowConfiguration.java
License:Apache License
@Bean @StepScope//from w w w. j av a 2s .c om public Tasklet archiveTasklet(FileSystem fileSystem, @Value("#jobParameters['inputDir']") String inputDir, @Value("#jobParameters['archiveDir'") String archiveDir) { return new Tasklet() { @Override public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { FileUtil.copy(fileSystem, new Path(inputDir), fileSystem, new Path("/probe/archive/" + archiveDir), true, fileSystem.getConf()); return RepeatStatus.FINISHED; } }; }
From source file:ivory.core.data.document.IntDocVectorsForwardIndex.java
License:Apache License
/** * Creates an {@code IntDocVectorsIndex} object. * * @param indexPath location of the index file * @param fs handle to the FileSystem//w w w . j a v a 2 s. c o m * @param weighted {@code true} to load weighted document vectors * @throws IOException */ public IntDocVectorsForwardIndex(String indexPath, FileSystem fs, boolean weighted) throws IOException { this.fs = Preconditions.checkNotNull(fs); this.conf = fs.getConf(); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs); path = (weighted ? env.getWeightedIntDocVectorsDirectory() : env.getIntDocVectorsDirectory()); String forwardIndexPath = (weighted ? env.getWeightedIntDocVectorsForwardIndex() : env.getIntDocVectorsForwardIndex()); FSDataInputStream posInput = fs.open(new Path(forwardIndexPath)); docnoOffset = posInput.readInt(); collectionDocumentCount = posInput.readInt(); positions = new long[collectionDocumentCount]; for (int i = 0; i < collectionDocumentCount; i++) { positions[i] = posInput.readLong(); } }
From source file:ivory.core.data.document.TermDocVectorsForwardIndex.java
License:Apache License
/** * Creates a {code TermDocVectorsIndex} object. * * @param indexPath location of the index file * @param fs handle to the FileSystem//from w w w . ja v a 2 s . co m * @throws IOException */ public TermDocVectorsForwardIndex(String indexPath, FileSystem fs) throws IOException { Preconditions.checkNotNull(indexPath); this.fs = Preconditions.checkNotNull(fs); conf = fs.getConf(); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs); path = env.getTermDocVectorsDirectory(); FSDataInputStream posInput = fs.open(new Path(env.getTermDocVectorsForwardIndex())); docnoOffset = posInput.readInt(); collectionDocumentCount = posInput.readInt(); positions = new long[collectionDocumentCount]; for (int i = 0; i < collectionDocumentCount; i++) { positions[i] = posInput.readLong(); } posInput.close(); }
From source file:ivory.core.data.index.IntPostingsForwardIndex.java
License:Apache License
public IntPostingsForwardIndex(String indexPath, FileSystem fs) throws IOException { this.fs = fs; this.conf = fs.getConf(); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs); postingsPath = env.getPostingsDirectory(); FSDataInputStream posInput = fs.open(new Path(env.getPostingsIndexData())); int l = posInput.readInt(); positions = new long[l]; for (int i = 0; i < l; i++) { positions[i] = posInput.readLong(); }// ww w . j av a2 s . c om }
From source file:ivory.data.IntDocVectorsForwardIndex.java
License:Apache License
public IntDocVectorsForwardIndex(String indexPath, FileSystem fs, boolean weighted) throws IOException { mFs = fs;//from www . j av a 2s. c o m mConf = fs.getConf(); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs); mPath = (weighted ? env.getWeightedIntDocVectorsDirectory() : env.getIntDocVectorsDirectory()); sLogger.debug("mPath: " + mPath); String forwardIndexPath = (weighted ? env.getWeightedIntDocVectorsForwardIndex() : env.getIntDocVectorsForwardIndex()); sLogger.debug("forwardIndexPath: " + forwardIndexPath); FSDataInputStream posInput = fs.open(new Path(forwardIndexPath)); mDocnoOffset = posInput.readInt(); mCollectionDocumentCount = posInput.readInt(); mPositions = new long[mCollectionDocumentCount]; for (int i = 0; i < mCollectionDocumentCount; i++) { mPositions[i] = posInput.readLong(); } }
From source file:ivory.data.IntPostingsForwardIndex.java
License:Apache License
public IntPostingsForwardIndex(String indexPath, FileSystem fs) throws IOException { mFs = fs;//from w ww .j a v a 2 s.c o m conf = fs.getConf(); postingsType = "ivory.data.PostingsListDocSortedPositional"; RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs); postingsPath = env.getPostingsDirectory(); FSDataInputStream posInput = fs.open(new Path(env.getPostingsIndexData())); int l = posInput.readInt(); positions = new long[l]; for (int i = 0; i < l; i++) { positions[i] = posInput.readLong(); //sLogger.info(positions[i]); } }
From source file:ivory.data.TermDocVectorsForwardIndex.java
License:Apache License
/** * Creates a <code>TermDocVectorsIndex</code> object. * // w w w .j ava2 s . com * @param indexPath location of the index file * @param fs handle to the FileSystem * @throws IOException */ public TermDocVectorsForwardIndex(String indexPath, FileSystem fs) throws IOException { this.fs = Preconditions.checkNotNull(fs); conf = fs.getConf(); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs); path = env.getTermDocVectorsDirectory(); FSDataInputStream posInput = fs.open(new Path(env.getTermDocVectorsForwardIndex())); docnoOffset = posInput.readInt(); collectionDocumentCount = posInput.readInt(); positions = new long[collectionDocumentCount]; for (int i = 0; i < collectionDocumentCount; i++) { positions[i] = posInput.readLong(); } posInput.close(); }
From source file:nthu.scopelab.stsqr.ssvd.SSVDRunner.java
License:Apache License
public void run() throws Exception { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); String btPath = outputPath + "/BtJob/"; if (overwrite) { fs.delete(new Path(outputPath), true); }// ww w. ja va2 s. co m //QJob int[] iseed = { 0, 0, 0, 1 }; double[] x = new double[1]; Dlarnv.dlarnv(2, iseed, 0, 1, x, 0); long seed = (long) (x[0] * (double) Long.MAX_VALUE); ssvdQRJob qrjob = new ssvdQRJob(sc, Ardd, seed, k, p, redSche); qrjob.run(); JavaPairRDD<Long, sLMatrixWritable> Qrdd = qrjob.getQrdd().cache(); //BtJob BtJob btjob = new BtJob(sc, Ardd, Qrdd, k, p, outerBlockHeight, btPath); btjob.run(); cmUpperTriangDenseMatrix bbt = btjob.getBBt(); //removed ABtDense iteration part temporarily // convert bbt to something our eigensolver could understand assert bbt.numColumns() == k + p; double[][] bbtSquare = new double[k + p][]; for (int i = 0; i < k + p; i++) { bbtSquare[i] = new double[k + p]; } for (int i = 0; i < k + p; i++) { for (int j = i; j < k + p; j++) { bbtSquare[i][j] = bbtSquare[j][i] = bbt.get(i, j); } } double[] svalues = new double[k + p]; // try something else. EigenSolver eigenWrapper = new EigenSolver(bbtSquare); double[] eigenva2 = eigenWrapper.getWR(); for (int i = 0; i < k + p; i++) { svalues[i] = Math.sqrt(eigenva2[i]); // sqrt? } // save/redistribute UHat double[][] uHat = eigenWrapper.getVL(); //uHat and svalues are necessary parameters for UJob and VJob cmDenseMatrix uHatMat = new cmDenseMatrix(uHat); svaluesVector = new DenseVector(svalues); //UJob UJob ujob = new UJob(Qrdd, uHatMat, svaluesVector, k, p, cUHalfSigma); ujob.run(); //VJob //read Btrdd JavaPairRDD<IntWritable, VectorWritable> BtrddSeq = sc.hadoopFile(btPath, SequenceFileInputFormat.class, IntWritable.class, VectorWritable.class); VJob vjob = new VJob(BtrddSeq, uHatMat, svaluesVector, k, p, vrbs, cVHalfSigma); vjob.run(); //output Urdd and Vrdd ujob.getUrdd().saveAsHadoopFile(uPath, LongWritable.class, sLMatrixWritable.class, SequenceFileOutputFormat.class); vjob.getVrdd().saveAsHadoopFile(vPath, LongWritable.class, sLMatrixWritable.class, SequenceFileOutputFormat.class); //output sigma SequenceFile.Writer svWriter = SequenceFile.createWriter(fs, fs.getConf(), new Path(sigmaPath + "/svalues.seq"), IntWritable.class, VectorWritable.class); svWriter.append(new IntWritable(0), new VectorWritable(new DenseVector(svalues, true))); svWriter.close(); }