List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:com.hazelcast.jet.hadoop.impl.ReadHdfsPTest.java
License:Open Source License
private void writeToFile() throws IOException { Configuration conf = new Configuration(); LocalFileSystem local = FileSystem.getLocal(conf); IntStream.range(0, 4).mapToObj(i -> createPath()).forEach(path -> uncheckRun(() -> { paths.add(path);/* w w w . j a v a 2 s .c om*/ if (SequenceFileInputFormat.class.equals(inputFormatClass)) { writeToSequenceFile(conf, path); } else { writeToTextFile(local, path); } })); }
From source file:com.hazelcast.jet.impl.connector.hadoop.ReadHdfsPTest.java
License:Open Source License
private void writeToFile() throws IOException { Configuration conf = new Configuration(); LocalFileSystem local = FileSystem.getLocal(conf); IntStream.range(0, 4).mapToObj(this::createPath).forEach(path -> uncheckRun(() -> { paths.add(path);//from w ww . j av a 2 s . c o m if (SequenceFileInputFormat.class.equals(inputFormatClass)) { writeToSequenceFile(conf, path); } else { writeToTextFile(local, path); } })); }
From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlock.java
License:Open Source License
@Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { //allocate output matrix block MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, false, false); //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job); Path path = new Path((_localFS ? "file:///" : "") + fname); //check existence and non-empty file checkValidInputFile(fs, path);/*from w w w . j a v a 2 s. c o m*/ //core read readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen); //finally check if change of sparse/dense block representation required if (!AGGREGATE_BLOCK_NNZ) ret.recomputeNonZeros(); ret.examSparsity(); return ret; }
From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlock.java
License:Open Source License
/** * /* w w w . jav a2s. co m*/ * @param fname * @param rlen * @param clen * @param brlen * @param bclen * @param estnnz * @return * @throws IOException * @throws DMLRuntimeException */ public ArrayList<IndexedMatrixValue> readIndexedMatrixBlocksFromHDFS(String fname, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException { //allocate output matrix block collection ArrayList<IndexedMatrixValue> ret = new ArrayList<IndexedMatrixValue>(); //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job); Path path = new Path((_localFS ? "file:///" : "") + fname); //check existence and non-empty file checkValidInputFile(fs, path); //core read readBinaryBlockMatrixBlocksFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen); return ret; }
From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlockParallel.java
License:Open Source License
@Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { //allocate output matrix block (incl block allocation for parallel) MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, true, true); //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job); Path path = new Path((_localFS ? "file:///" : "") + fname); //check existence and non-empty file checkValidInputFile(fs, path);//from www. ja va 2 s .com //core read readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen); //finally check if change of sparse/dense block representation required if (!AGGREGATE_BLOCK_NNZ) ret.recomputeNonZeros(); ret.examSparsity(); return ret; }
From source file:com.ibm.bi.dml.runtime.io.ReaderBinaryBlockParFiles.java
License:Open Source License
@Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { //allocate output matrix block MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, false, false); //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job); Path path = new Path((_localFS ? "file:///" : "") + fname); //check existence and non-empty file checkValidInputFile(fs, path);/* www .ja v a2 s .c o m*/ //core read readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen); //finally check if change of sparse/dense block representation required ret.recomputeNonZeros(); ret.examSparsity(); return ret; }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MMCJMRInputCache.java
License:Open Source License
public MMCJMRInputCache(JobConf conf, long memSize, long rlen, long clen, int brlen, int bclen, boolean leftCached, Class<? extends MatrixValue> valueClass) throws IOException, IllegalAccessException, InstantiationException { long elementSize = 77 + 8 * Math.min(rlen, brlen) * Math.min(clen, bclen) + 20 + 12 + 12 + 4;//matrix block, matrix index, pair, integer in the linked list long numRowBlocks = leftCached ? (long) Math.ceil((double) rlen / (double) brlen) : 1; long numColBlocks = leftCached ? 1 : (long) Math.ceil((double) clen / (double) bclen); int buffCapacity = (int) Math.max(Math.min((memSize / elementSize), (numRowBlocks * numColBlocks)), 1); super.allocateBuffer(buffCapacity, valueClass, false); //local file management (if necessary) int n = (int) Math.ceil((double) (numRowBlocks * numColBlocks) / (double) _bufferCapacity); memOnly = (n == 1);// www . java 2 s .co m if (!memOnly) { _job = conf; _fs = FileSystem.getLocal(_job); _fileN = n; super.constructLocalFilePrefix("_input_cache_"); super.deleteAllWorkingFiles(); } }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.PartialAggregator.java
License:Open Source License
/** * /*from ww w . ja va2 s . c o m*/ * @param conf * @param memSize * @param resultRlen * @param resultClen * @param blockRlen * @param blockClen * @param filePrefix * @param inRowMajor * @param op * @param vCls * @throws InstantiationException * @throws IllegalAccessException * @throws IOException */ public PartialAggregator(JobConf conf, long memSize, long resultRlen, long resultClen, int blockRlen, int blockClen, boolean inRowMajor, AggregateBinaryOperator op, Class<? extends MatrixValue> vCls) throws InstantiationException, IllegalAccessException, IOException { rlen = resultRlen; clen = resultClen; brlen = blockRlen; bclen = blockClen; numBlocksInRow = (long) Math.ceil((double) clen / (double) bclen); numBlocksInColumn = (long) Math.ceil((double) rlen / (double) brlen); operation = op; //allocate space for buffer //if the buffer space is already larger than the result size, don't need extra space long elementSize = 77 + 8 * Math.min(rlen, brlen) * Math.min(clen, bclen) + 20 + 12 + 12 + 4;//matrix block, matrix index, pair, integer in the linked list int buffCapacity = (int) Math.max(Math.min((memSize / elementSize), (numBlocksInRow * numBlocksInColumn)), 1); super.allocateBuffer(buffCapacity, vCls, true); //local file management (if necessary) int n = (int) Math.ceil((double) (numBlocksInRow * numBlocksInColumn) / (double) _bufferCapacity); memOnly = (n == 1); if (!memOnly) { _job = conf; _fs = FileSystem.getLocal(_job); rowMajor = inRowMajor; _fileN = n; super.constructLocalFilePrefix("_partial_aggregator_"); super.deleteAllWorkingFiles(); } }
From source file:com.ibm.bi.dml.runtime.transform.ApplyTfHelper.java
License:Open Source License
public void loadTfMetadata(JSONObject spec) throws IOException { Path txMtdDir = (DistributedCache.getLocalCacheFiles(_rJob))[0]; FileSystem localFS = FileSystem.getLocal(_rJob); // load transformation metadata _mia.loadTxMtd(_rJob, localFS, txMtdDir); _ra.loadTxMtd(_rJob, localFS, txMtdDir); _ba.loadTxMtd(_rJob, localFS, txMtdDir); // associate recode maps and bin definitions with dummycoding agent, // as recoded and binned columns are typically dummycoded _da.setRecodeMaps(_ra.getRecodeMaps()); _da.setNumBins(_ba.getBinList(), _ba.getNumBins()); _da.loadTxMtd(_rJob, localFS, txMtdDir); FileSystem fs;/*w w w .j a va 2s .com*/ fs = FileSystem.get(_rJob); Path thisPath = new Path(_rJob.get("map.input.file")).makeQualified(fs); String thisfile = thisPath.toString(); Path smallestFilePath = new Path(_rJob.get(MRJobConfiguration.TF_SMALLEST_FILE)).makeQualified(fs); if (thisfile.toString().equals(smallestFilePath.toString())) _partFileWithHeader = true; else _partFileWithHeader = false; }
From source file:com.ibm.bi.dml.runtime.transform.TfUtils.java
License:Open Source License
public void loadTfMetadata(JobConf job, boolean fromLocalFS) throws IOException { Path tfMtdDir = null;/* ww w .ja v a 2 s .c om*/ FileSystem fs = null; if (fromLocalFS) { // metadata must be read from local file system (e.g., distributed cache in the case of Hadoop) tfMtdDir = (DistributedCache.getLocalCacheFiles(job))[0]; fs = FileSystem.getLocal(job); } else { fs = FileSystem.get(job); tfMtdDir = new Path(getTfMtdDir()); } // load transformation metadata getMVImputeAgent().loadTxMtd(job, fs, tfMtdDir, this); getRecodeAgent().loadTxMtd(job, fs, tfMtdDir, this); getBinAgent().loadTxMtd(job, fs, tfMtdDir, this); // associate recode maps and bin definitions with dummycoding agent, // as recoded and binned columns are typically dummycoded getDummycodeAgent().setRecodeMaps(getRecodeAgent().getRecodeMaps()); getDummycodeAgent().setNumBins(getBinAgent().getBinList(), getBinAgent().getNumBins()); getDummycodeAgent().loadTxMtd(job, fs, tfMtdDir, this); }