List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.util.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix multiplication A * B using MapReduce tasks on CPU or * GPU// www. j ava 2s. com * * @param other a DistributedRowMatrix * @param outPath path to write result to * @param useGPU use GPU or CPU (default: false, use CPU) * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath) throws IOException, ClassNotFoundException, InterruptedException { // Check if cols of MatrixA = rows of MatrixB // (l x m) * (m x n) = (l x n) if (numCols != other.numRows()) { throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")"); } Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf(); // Debug // System.out.println("DistributedRowMatrix transposed:"); // transposed.printDistributedRowMatrix(); // Build MatrixMultiplication job configuration BSPJob job = MatrixMultiplicationHybridBSP.createMatrixMultiplicationHybridBSPConf(initialConf, this.rowPath, other.rowPath, outPath.getParent()); // Multiply Matrix if (job.waitForCompletion(true)) { // Rename result file to output path Configuration conf = job.getConfiguration(); FileSystem fs = outPath.getFileSystem(conf); FileStatus[] files = fs.listStatus(outPath.getParent()); for (int i = 0; i < files.length; i++) { if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) { fs.rename(files[i].getPath(), outPath); break; } } // Read resulting Matrix from HDFS DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols()); out.setConf(conf); return out; } return null; }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix multiplication A * B using MapReduce tasks on CPU or * GPU/* w w w . j a v a2 s . c o m*/ * * @param other a DistributedRowMatrix * @param outPath path to write result to * @param useGPU use GPU or CPU (default: false, use CPU) * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, int tileWidth, boolean isDebugging) throws IOException, ClassNotFoundException, InterruptedException { // Check if cols of MatrixA = rows of MatrixB // (l x m) * (m x n) = (l x n) if (numCols != other.numRows()) { throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")"); } Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf(); // Debug // System.out.println("DistributedRowMatrix transposed:"); // transposed.printDistributedRowMatrix(); // Build MatrixMultiplication job configuration BSPJob job = MatrixMultiplicationHybridBSP.createMatrixMultiplicationHybridBSPConf(initialConf, this.rowPath, other.rowPath, outPath.getParent(), tileWidth, isDebugging); // Multiply Matrix if (job.waitForCompletion(true)) { // Rename result file to output path Configuration conf = job.getConfiguration(); FileSystem fs = outPath.getFileSystem(conf); FileStatus[] files = fs.listStatus(outPath.getParent()); for (int i = 0; i < files.length; i++) { if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) { fs.rename(files[i].getPath(), outPath); break; } } // Read resulting Matrix from HDFS DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols()); out.setConf(conf); return out; } return null; }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCF.java
License:Apache License
@Override public boolean load(String path, boolean lazy) { this.m_isLazyLoadModel = lazy; this.m_modelPath = path; if (lazy == false) { Path dataPath = new Path(m_modelPath); Configuration conf = new Configuration(); try {//w ww .j a v a 2s . c o m FileSystem fs = dataPath.getFileSystem(conf); LinkedList<Path> files = new LinkedList<Path>(); if (!fs.exists(dataPath)) { this.m_isLazyLoadModel = false; this.m_modelPath = null; return false; } if (!fs.isFile(dataPath)) { for (int i = 0; i < 100000; i++) { Path partFile = new Path( m_modelPath + "/part-" + String.valueOf(100000 + i).substring(1, 6)); if (fs.exists(partFile)) { files.add(partFile); } else { break; } } } else { files.add(dataPath); } LOG.info("loading model from " + path); for (Path file : files) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf); Text key = new Text(); PipesVectorWritable value = new PipesVectorWritable(); String strKey = null; Long actualKey = null; String firstSymbol = null; while (reader.next(key, value) != false) { strKey = key.toString(); firstSymbol = strKey.substring(0, 1); try { actualKey = Long.valueOf(strKey.substring(1)); } catch (Exception e) { actualKey = new Long(0); } if (firstSymbol.equals(OnlineCF.DFLT_MODEL_ITEM_DELIM)) { // LOG.info("loaded itemId: " + actualKey + " itemVector: " // + value.getVector()); m_modelItemFactorizedValues.put(actualKey, new PipesVectorWritable(value)); } else if (firstSymbol.equals(OnlineCF.DFLT_MODEL_USER_DELIM)) { // LOG.info("loaded userId: " + actualKey + " userVector: " // + value.getVector()); m_modelUserFactorizedValues.put(actualKey, new PipesVectorWritable(value)); } else { // unknown continue; } } reader.close(); } LOG.info("loaded: " + m_modelUserFactorizedValues.size() + " users, " + m_modelItemFactorizedValues.size() + " items"); // for (Long user : m_modelUserFactorizedValues.keySet()) { // LOG.info("userId: " + user + " userVector: " // + m_modelUserFactorizedValues.get(user)); // } // for (Long item : m_modelItemFactorizedValues.keySet()) { // LOG.info("itemId: " + item + " itemVector: " // + m_modelItemFactorizedValues.get(item)); // } } catch (Exception e) { e.printStackTrace(); this.m_isLazyLoadModel = false; this.m_modelPath = null; return false; } } return true; }
From source file:at.illecker.hama.hybrid.examples.testrootbeer.TestRootbeerHybridBSP.java
License:Apache License
private static void prepareInput(Configuration conf, Path inputPath, int n, int maxVal) throws IOException { FileSystem fs = inputPath.getFileSystem(conf); // Create input file writers depending on bspTaskNum int bspTaskNum = conf.getInt("bsp.peers.num", 1); SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum]; for (int i = 0; i < bspTaskNum; i++) { Path inputFile = new Path(inputPath, "input" + i + ".seq"); LOG.info("inputFile: " + inputFile.toString()); inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, IntWritable.class, CompressionType.NONE);//from w w w . j a v a2s . c o m } // Write random values to input files IntWritable key = new IntWritable(); IntWritable value = new IntWritable(); Random r = new Random(); for (int i = 0; i < n; i++) { key.set(i); value.set(r.nextInt(maxVal)); for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].append(key, value); } } // Close file writers for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].close(); } }
From source file:at.illecker.hama.hybrid.examples.testrootbeer.TestRootbeerHybridBSP.java
License:Apache License
static void printOutput(BSPJob job, Path path) throws IOException { FileSystem fs = path.getFileSystem(job.getConfiguration()); FileStatus[] files = fs.listStatus(path); for (int i = 0; i < files.length; i++) { if (files[i].getLen() > 0) { System.out.println("File " + files[i].getPath()); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, files[i].getPath(), job.getConfiguration()); NullWritable key = NullWritable.get(); IntWritable value = new IntWritable(); while (reader.next(key, value)) { System.out.println("key: '" + key + "' value: '" + value.get() + "'\n"); }/*from w w w. ja va 2s . com*/ } catch (IOException e) { FSDataInputStream in = fs.open(files[i].getPath()); IOUtils.copyBytes(in, System.out, job.getConfiguration(), false); in.close(); } finally { if (reader != null) { reader.close(); } } } } // fs.delete(FileOutputFormat.getOutputPath(job), true); }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.util.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix multiplication A * B using MapReduce tasks on CPU or * GPU/*ww w . java 2 s . c o m*/ * * @param other a DistributedRowMatrix * @param outPath path to write result to * @param useGPU use GPU or CPU (default: false, use CPU) * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU, boolean transposeMatrixA) throws IOException, ClassNotFoundException, InterruptedException { // Check if cols of MatrixA = rows of MatrixB // (l x m) * (m x n) = (l x n) if (numCols != other.numRows()) { throw new CardinalityException(numCols, other.numRows()); } Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf(); // Transpose Matrix within a new MapReduce Job DistributedRowMatrix transposed = this; if (transposeMatrixA) { transposed = transposed.transpose(); } // Debug // System.out.println("DistributedRowMatrix transposed:"); // transposed.printDistributedRowMatrix(); // Build MatrixMultiplication job configuration BSPJob job = null; if (!useGPU) { job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, transposed.rowPath, other.rowPath, outPath.getParent(), other.numCols); } else { // use GPU job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, transposed.rowPath, other.rowPath, outPath.getParent(), other.numCols); } // Multiply Matrix with transposed one if (job.waitForCompletion(true)) { // Rename result file to output path Configuration conf = job.getConfiguration(); FileSystem fs = outPath.getFileSystem(conf); FileStatus[] files = fs.listStatus(outPath.getParent()); for (int i = 0; i < files.length; i++) { if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) { fs.rename(files[i].getPath(), outPath); break; } } // Read resulting Matrix from HDFS DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols()); out.setConf(conf); return out; } return null; }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.util.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix multiplication A * B using MapReduce tasks on CPU or * GPU// www . j a va 2 s. c o m * * @param other a DistributedRowMatrix * @param outPath path to write result to * @param useGPU use GPU or CPU (default: false, use CPU) * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, boolean useGPU) throws IOException, ClassNotFoundException, InterruptedException { // Check if cols of MatrixA = rows of MatrixB // (l x m) * (m x n) = (l x n) if (numCols != other.numRows()) { throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")"); } Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf(); // Debug // System.out.println("DistributedRowMatrix transposed:"); // transposed.printDistributedRowMatrix(); // Build MatrixMultiplication job configuration BSPJob job = null; if (!useGPU) { job = MatrixMultiplicationBSPCpu.createMatrixMultiplicationBSPCpuConf(initialConf, this.rowPath, other.rowPath, outPath.getParent()); } else { // use GPU job = MatrixMultiplicationBSPGpu.createMatrixMultiplicationBSPGpuConf(initialConf, this.rowPath, other.rowPath, outPath.getParent()); } // Multiply Matrix if (job.waitForCompletion(true)) { // Rename result file to output path Configuration conf = job.getConfiguration(); FileSystem fs = outPath.getFileSystem(conf); FileStatus[] files = fs.listStatus(outPath.getParent()); for (int i = 0; i < files.length; i++) { if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) { fs.rename(files[i].getPath(), outPath); break; } } // Read resulting Matrix from HDFS DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols()); out.setConf(conf); return out; } return null; }
From source file:authordetect.input.SingleBookReader.java
/** * @param inputSplit/*from w w w. ja v a2 s.co m*/ * @param context the information about the task * @throws java.io.IOException * @throws InterruptedException */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration configuration = context.getConfiguration(); // get the option from configuration: // 0 for group by author, 1 for group by book int option = configuration.getInt("GROUP_OPTION", 0); Path path = split.getPath(); filename = path.getName(); FileSystem fileSystem = path.getFileSystem(configuration); FSDataInputStream inputStream = fileSystem.open(path); lineReader = new LineReader(inputStream, configuration); //initial start point and end point start = split.getStart(); end = start + split.getLength(); inputStream.seek(start); if (start != 0) { start += lineReader.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start)); } start += lineReader.readLine(currentLine); prepareToScanBook(option); }
From source file:azkaban.jobtype.connectors.teradata.TeradataToHdfsJobRunnerMain.java
License:Apache License
private void runCopyTdToHdfs() throws IOException { if (Boolean.valueOf(_jobProps.getProperty("force.output.overwrite", "false").trim())) { Path path = new Path(_jobProps.getProperty(TdchConstants.TARGET_HDFS_PATH_KEY)); _logger.info("Deleting output directory " + path.toUri()); JobConf conf = new JobConf(); path.getFileSystem(conf).delete(path, true); }/*from w ww . j av a 2 s .c om*/ _logger.info(String.format("Executing %s with params: %s", TeradataToHdfsJobRunnerMain.class.getSimpleName(), _params)); TeradataImportTool.main(_params.toTdchParams()); }
From source file:azkaban.jobtype.connectors.TeradataToHdfsJobRunnerMain.java
License:Apache License
private void runCopyTdToHdfs() throws IOException { if (Boolean.valueOf(_jobProps.getProperty("force.output.overwrite", "false").trim())) { Path path = new Path(_jobProps.getProperty(TdchConstants.TARGET_HDFS_PATH_KEY)); _logger.info("Deleting output directory " + path.toUri()); JobConf conf = new JobConf(); path.getFileSystem(conf).delete(path, true); }/*from w ww. j av a 2 s . c om*/ _logger.info(String.format("Executing %s with params: %s", TeradataToHdfsJobRunnerMain.class.getSimpleName(), _params)); TeradataImportTool.main(_params.toTdchParams()); }