List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
/** * prepareInputData/*from w w w .ja va2 s . c om*/ * */ public static void prepareInputData(Configuration conf, FileSystem fs, Path in, Path centerIn, int numBspTask, int numGPUBspTask, long n, int k, int vectorDimension, Random rand, int GPUPercentage) throws IOException { // Delete input files if already exist if (fs.exists(in)) { fs.delete(in, true); } if (fs.exists(centerIn)) { fs.delete(centerIn, true); } final NullWritable nullValue = NullWritable.get(); final SequenceFile.Writer centerWriter = SequenceFile.createWriter(fs, conf, centerIn, PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); // Compute work distributions int cpuTaskNum = numBspTask - numGPUBspTask; long inputVectorsPerGPUTask = 0; long inputVectorsPerCPU = 0; long inputVectorsPerCPUTask = 0; if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) { inputVectorsPerGPUTask = (n * GPUPercentage) / 100; inputVectorsPerCPU = n - inputVectorsPerGPUTask; } else { inputVectorsPerCPU = n; } if (cpuTaskNum > 0) { inputVectorsPerCPUTask = inputVectorsPerCPU / cpuTaskNum; } // long interval = totalNumberOfPoints / numBspTask; long centers = 0; for (int part = 0; part < numBspTask; part++) { Path partIn = new Path(in, "part" + part + ".seq"); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); long interval = 0; if (part > cpuTaskNum) { interval = inputVectorsPerGPUTask; } else { interval = inputVectorsPerCPUTask; } long start = interval * part; long end = start + interval - 1; if ((numBspTask - 1) == part) { end = n; // set to totalNumberOfPoints } LOG.info("Partition " + part + ": from " + start + " to " + end); for (long i = start; i <= end; i++) { double[] arr = new double[vectorDimension]; for (int j = 0; j < vectorDimension; j++) { if (rand != null) { arr[j] = rand.nextInt((int) n); } else { arr[j] = i; } } PipesVectorWritable vector = new PipesVectorWritable(new DenseDoubleVector(arr)); // LOG.info("input[" + i + "]: " + Arrays.toString(arr)); dataWriter.append(vector, nullValue); if (k > centers) { // LOG.info("center[" + i + "]: " + Arrays.toString(arr)); centerWriter.append(vector, nullValue); centers++; } else { centerWriter.close(); } } dataWriter.close(); } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
/** * Create testExample vectors and centers as input from * http://www.maplesoft.com/support/help/Maple/view.aspx?path=NAG/g03efc * /*w w w . jav a2 s . c o m*/ * n := 20: vectorDimension := 5: k := 3: maxIterations := 10: * * x := Matrix([ [77.3, 13, 9.699999999999999, 1.5, 6.4], [82.5, 10, 7.5, 1.5, * 6.5], [66.90000000000001, 20.6, 12.5, 2.3, 7], [47.2, 33.8, 19, 2.8, 5.8], * [65.3, 20.5, 14.2, 1.9, 6.9], [83.3, 10, 6.7, 2.2, 7], [81.59999999999999, * 12.7, 5.7, 2.9, 6.7], [47.8, 36.5, 15.7, 2.3, 7.2], [48.6, 37.1, 14.3, 2.1, * 7.2], [61.6, 25.5, 12.9, 1.9, 7.3], [58.6, 26.5, 14.9, 2.4, 6.7], [69.3, * 22.3, 8.4, 4, 7], [61.8, 30.8, 7.4, 2.7, 6.4], [67.7, 25.3, 7, 4.8, 7.3], * [57.2, 31.2, 11.6, 2.4, 6.5], [67.2, 22.7, 10.1, 3.3, 6.2], [59.2, 31.2, * 9.6, 2.4, 6], [80.2, 13.2, 6.6, 2, 5.8], [82.2, 11.1, 6.7, 2.2, 7.2], * [69.7, 20.7, 9.6, 3.1, 5.9]], datatype=float[8], order='C_order'): * * cmeans := Matrix( [[82.5, 10, 7.5, 1.5, 6.5], [47.8, 36.5, 15.7, 2.3, 7.2], * [67.2, 22.7, 10.1, 3.3, 6.2]], datatype=float[8], order='C_order'): * * * Results * * cmeans := Matrix([ [81.1833333333333371, 11.6666666666666661, * 7.1499999999999947, 2.0500000000000027, 6.6000000000000052], * [47.8666666666666671, 35.8000000000000043, 16.3333333333333321, * 2.3999999999999992, 6.7333333333333340], [64.0454545454545610, * 25.2090909090909037, 10.7454545454545425, 2.83636363636363642, * 6.65454545454545521]]): * * inc := Vector([0, 0, 2, 1, 2, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, * 2]): * * nic := Vector([6, 3, 11]): * * css := Vector([46.5716666666666583, 20.3800000000000097, * 468.896363636363503]): * */ public static void prepareTestInput(Configuration conf, FileSystem fs, Path in, Path centerIn) throws IOException { // Delete input files if already exist if (fs.exists(in)) { fs.delete(in, true); } if (fs.exists(centerIn)) { fs.delete(centerIn, true); } double[][] input = { { 77.3, 13, 9.699999999999999, 1.5, 6.4 }, { 82.5, 10, 7.5, 1.5, 6.5 }, { 66.90000000000001, 20.6, 12.5, 2.3, 7 }, { 47.2, 33.8, 19, 2.8, 5.8 }, { 65.3, 20.5, 14.2, 1.9, 6.9 }, { 83.3, 10, 6.7, 2.2, 7 }, { 81.59999999999999, 12.7, 5.7, 2.9, 6.7 }, { 47.8, 36.5, 15.7, 2.3, 7.2 }, { 48.6, 37.1, 14.3, 2.1, 7.2 }, { 61.6, 25.5, 12.9, 1.9, 7.3 }, { 58.6, 26.5, 14.9, 2.4, 6.7 }, { 69.3, 22.3, 8.4, 4, 7 }, { 61.8, 30.8, 7.4, 2.7, 6.4 }, { 67.7, 25.3, 7, 4.8, 7.3 }, { 57.2, 31.2, 11.6, 2.4, 6.5 }, { 67.2, 22.7, 10.1, 3.3, 6.2 }, { 59.2, 31.2, 9.6, 2.4, 6 }, { 80.2, 13.2, 6.6, 2, 5.8 }, { 82.2, 11.1, 6.7, 2.2, 7.2 }, { 69.7, 20.7, 9.6, 3.1, 5.9 } }; double[][] centers = { { 82.5, 10, 7.5, 1.5, 6.5 }, { 47.8, 36.5, 15.7, 2.3, 7.2 }, { 67.2, 22.7, 10.1, 3.3, 6.2 } }; final NullWritable nullValue = NullWritable.get(); // Write inputs LOG.info("inputs: "); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, in, PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); for (int i = 0; i < input.length; i++) { dataWriter.append(new PipesVectorWritable(new DenseDoubleVector(input[i])), nullValue); LOG.info("input[" + i + "]: " + Arrays.toString(input[i])); } dataWriter.close(); // Write centers LOG.info("centers: "); final SequenceFile.Writer centerWriter = SequenceFile.createWriter(fs, conf, centerIn, PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); for (int i = 0; i < centers.length; i++) { centerWriter.append(new PipesVectorWritable(new DenseDoubleVector(centers[i])), nullValue); LOG.info("center[" + i + "]: " + Arrays.toString(centers[i])); } centerWriter.close(); }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCF.java
License:Apache License
@Override public boolean load(String path, boolean lazy) { this.m_isLazyLoadModel = lazy; this.m_modelPath = path; if (lazy == false) { Path dataPath = new Path(m_modelPath); Configuration conf = new Configuration(); try {//from w w w .j a v a 2 s .c o m FileSystem fs = dataPath.getFileSystem(conf); LinkedList<Path> files = new LinkedList<Path>(); if (!fs.exists(dataPath)) { this.m_isLazyLoadModel = false; this.m_modelPath = null; return false; } if (!fs.isFile(dataPath)) { for (int i = 0; i < 100000; i++) { Path partFile = new Path( m_modelPath + "/part-" + String.valueOf(100000 + i).substring(1, 6)); if (fs.exists(partFile)) { files.add(partFile); } else { break; } } } else { files.add(dataPath); } LOG.info("loading model from " + path); for (Path file : files) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf); Text key = new Text(); PipesVectorWritable value = new PipesVectorWritable(); String strKey = null; Long actualKey = null; String firstSymbol = null; while (reader.next(key, value) != false) { strKey = key.toString(); firstSymbol = strKey.substring(0, 1); try { actualKey = Long.valueOf(strKey.substring(1)); } catch (Exception e) { actualKey = new Long(0); } if (firstSymbol.equals(OnlineCF.DFLT_MODEL_ITEM_DELIM)) { // LOG.info("loaded itemId: " + actualKey + " itemVector: " // + value.getVector()); m_modelItemFactorizedValues.put(actualKey, new PipesVectorWritable(value)); } else if (firstSymbol.equals(OnlineCF.DFLT_MODEL_USER_DELIM)) { // LOG.info("loaded userId: " + actualKey + " userVector: " // + value.getVector()); m_modelUserFactorizedValues.put(actualKey, new PipesVectorWritable(value)); } else { // unknown continue; } } reader.close(); } LOG.info("loaded: " + m_modelUserFactorizedValues.size() + " users, " + m_modelItemFactorizedValues.size() + " items"); // for (Long user : m_modelUserFactorizedValues.keySet()) { // LOG.info("userId: " + user + " userVector: " // + m_modelUserFactorizedValues.get(user)); // } // for (Long item : m_modelItemFactorizedValues.keySet()) { // LOG.info("itemId: " + item + " itemVector: " // + m_modelItemFactorizedValues.get(item)); // } } catch (Exception e) { e.printStackTrace(); this.m_isLazyLoadModel = false; this.m_modelPath = null; return false; } } return true; }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFHybridBenchmark.java
License:Apache License
public static List<double[]> generateRandomInputData(Configuration conf, FileSystem fs, Path in, int numBspTask, int numGPUBspTask, int userCount, int itemCount, int percentNonZeroValues, int GPUPercentage, int maxTestPrefs) throws IOException { // Delete input directory if already exist if (fs.exists(in)) { fs.delete(in, true);//from www. j a va 2 s . c o m } Random rand = new Random(32L); Set<Map.Entry<Long, Long>> userItemPairs = new HashSet<Map.Entry<Long, Long>>(); List<double[]> testItems = new ArrayList<double[]>(); int possibleUserItemRatings = userCount * itemCount; int userItemRatings = possibleUserItemRatings * percentNonZeroValues / 100; System.out.println("generateRandomInputData possibleRatings: " + possibleUserItemRatings + " ratings: " + userItemRatings); // Compute work distributions int cpuTaskNum = numBspTask - numGPUBspTask; long ratingsPerGPUTask = 0; long ratingsPerCPU = 0; long ratingsPerCPUTask = 0; if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) { ratingsPerGPUTask = (userItemRatings * GPUPercentage) / 100; ratingsPerCPU = userItemRatings - ratingsPerGPUTask; } else { ratingsPerCPU = userItemRatings; } if (cpuTaskNum > 0) { ratingsPerCPUTask = ratingsPerCPU / cpuTaskNum; } System.out.println("generateRandomInputData ratingsPerGPUTask: " + ratingsPerGPUTask + " ratingsPerCPU: " + ratingsPerCPU + " ratingsPerCPUTask: " + ratingsPerCPUTask); for (int part = 0; part < numBspTask; part++) { Path partIn = new Path(in, "part" + part + ".seq"); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, LongWritable.class, PipesVectorWritable.class, CompressionType.NONE); long interval = 0; if (part > cpuTaskNum) { interval = ratingsPerGPUTask; } else { interval = ratingsPerCPUTask; } long start = interval * part; long end = start + interval - 1; if ((numBspTask - 1) == part) { end = userItemRatings; } System.out.println("Partition " + part + ": from " + start + " to " + end); for (long i = start; i <= end; i++) { // Find new user item rating which was not used before Map.Entry<Long, Long> userItemPair; do { long userId = rand.nextInt(userCount); long itemId = rand.nextInt(itemCount); userItemPair = new AbstractMap.SimpleImmutableEntry<Long, Long>(userId, itemId); } while (userItemPairs.contains(userItemPair)); // Add user item rating userItemPairs.add(userItemPair); // Generate rating int rating = rand.nextInt(5) + 1; // values between 1 and 5 // Add user item rating to test data if (i < maxTestPrefs) { testItems.add(new double[] { userItemPair.getKey(), userItemPair.getValue(), rating }); } // Write out user item rating dataWriter.append(new LongWritable(userItemPair.getKey()), new PipesVectorWritable( new DenseDoubleVector(new double[] { userItemPair.getValue(), rating }))); } dataWriter.close(); } return testItems; }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFHybridBenchmark.java
License:Apache License
public static List<double[]> convertInputData(Configuration conf, FileSystem fs, Path in, Path preferencesIn, String inputFile, String separator, int maxTestPrefs) throws IOException { List<double[]> testItems = new ArrayList<double[]>(); // Delete input files if already exist if (fs.exists(in)) { fs.delete(in, true);/*from ww w . j ava 2s . c om*/ } if (fs.exists(preferencesIn)) { fs.delete(preferencesIn, true); } final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn, LongWritable.class, PipesVectorWritable.class, CompressionType.NONE); BufferedReader br = new BufferedReader(new FileReader(inputFile)); String line; while ((line = br.readLine()) != null) { String[] values = line.split(separator); long userId = Long.parseLong(values[0]); long itemId = Long.parseLong(values[1]); double rating = Double.parseDouble(values[2]); // System.out.println("userId: " + userId + " itemId: " + itemId // + " rating: " + rating); double vector[] = new double[2]; vector[0] = itemId; vector[1] = rating; prefWriter.append(new LongWritable(userId), new PipesVectorWritable(new DenseDoubleVector(vector))); // Add test preferences maxTestPrefs--; if (maxTestPrefs > 0) { testItems.add(new double[] { userId, itemId, rating }); } } br.close(); prefWriter.close(); return testItems; }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
public static List<Preference<Long, Long>> prepareTestInputData(Configuration conf, FileSystem fs, Path in, Path preferencesIn) throws IOException { Preference[] train_prefs = { new Preference<Integer, Integer>(1, 0, 4), new Preference<Integer, Integer>(1, 1, 2.5), new Preference<Integer, Integer>(1, 2, 3.5), new Preference<Integer, Integer>(2, 0, 4), new Preference<Integer, Integer>(2, 1, 2.5), new Preference<Integer, Integer>(2, 2, 3.5), new Preference<Integer, Integer>(2, 3, 1), new Preference<Integer, Integer>(2, 4, 3.5), new Preference<Integer, Integer>(3, 0, 4), new Preference<Integer, Integer>(3, 1, 2.5), new Preference<Integer, Integer>(3, 2, 3.5), new Preference<Integer, Integer>(3, 3, 1), new Preference<Integer, Integer>(3, 4, 3.5) }; List<Preference<Long, Long>> test_prefs = new ArrayList<Preference<Long, Long>>(); test_prefs.add(new Preference<Long, Long>(1l, 0l, 4)); test_prefs.add(new Preference<Long, Long>(1l, 1l, 2.5)); test_prefs.add(new Preference<Long, Long>(1l, 2l, 3.5)); test_prefs.add(new Preference<Long, Long>(1l, 3l, 1)); test_prefs.add(new Preference<Long, Long>(1l, 4l, 3.5)); // Delete input files if already exist if (fs.exists(in)) { fs.delete(in, true);/*from www .ja v a2s . c om*/ } if (fs.exists(preferencesIn)) { fs.delete(preferencesIn, true); } final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn, LongWritable.class, PipesVectorWritable.class, CompressionType.NONE); for (Preference<Integer, Integer> taste : train_prefs) { double values[] = new double[2]; values[0] = taste.getItemId(); values[1] = taste.getValue().get(); prefWriter.append(new LongWritable(taste.getUserId()), new PipesVectorWritable(new DenseDoubleVector(values))); } prefWriter.close(); return test_prefs; }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
public static List<Preference<Long, Long>> generateRandomInputData(Configuration conf, FileSystem fs, Path in, int numBspTask, int numGPUBspTask, int userCount, int itemCount, int percentNonZeroValues, int GPUPercentage, int maxTestPrefs) throws IOException { // Delete input directory if already exist if (fs.exists(in)) { fs.delete(in, true);/* w ww .ja v a2 s. c o m*/ } Random rand = new Random(32L); Set<Map.Entry<Long, Long>> userItemPairs = new HashSet<Map.Entry<Long, Long>>(); List<Preference<Long, Long>> testItems = new ArrayList<Preference<Long, Long>>(); int possibleUserItemRatings = userCount * itemCount; int userItemRatings = possibleUserItemRatings * percentNonZeroValues / 100; System.out.println("generateRandomInputData possibleRatings: " + possibleUserItemRatings + " ratings: " + userItemRatings); // Compute work distributions int cpuTaskNum = numBspTask - numGPUBspTask; long ratingsPerGPUTask = 0; long ratingsPerCPU = 0; long ratingsPerCPUTask = 0; if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) { ratingsPerGPUTask = (userItemRatings * GPUPercentage) / 100; ratingsPerCPU = userItemRatings - ratingsPerGPUTask; } else { ratingsPerCPU = userItemRatings; } if (cpuTaskNum > 0) { ratingsPerCPUTask = ratingsPerCPU / cpuTaskNum; } System.out.println("generateRandomInputData ratingsPerGPUTask: " + ratingsPerGPUTask + " ratingsPerCPU: " + ratingsPerCPU + " ratingsPerCPUTask: " + ratingsPerCPUTask); for (int part = 0; part < numBspTask; part++) { Path partIn = new Path(in, "part" + part + ".seq"); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, LongWritable.class, PipesVectorWritable.class, CompressionType.NONE); long interval = 0; if (part > cpuTaskNum) { interval = ratingsPerGPUTask; } else { interval = ratingsPerCPUTask; } long start = interval * part; long end = start + interval - 1; if ((numBspTask - 1) == part) { end = userItemRatings; } LOG.info("Partition " + part + ": from " + start + " to " + end); for (long i = start; i <= end; i++) { // Find new user item rating which was not used before Map.Entry<Long, Long> userItemPair; do { long userId = rand.nextInt(userCount); long itemId = rand.nextInt(itemCount); userItemPair = new AbstractMap.SimpleImmutableEntry<Long, Long>(userId, itemId); } while (userItemPairs.contains(userItemPair)); // Add user item rating userItemPairs.add(userItemPair); // Generate rating int rating = rand.nextInt(5) + 1; // values between 1 and 5 // Add user item rating to test data if (i < maxTestPrefs) { testItems.add( new Preference<Long, Long>(userItemPair.getKey(), userItemPair.getValue(), rating)); } // Write out user item rating dataWriter.append(new LongWritable(userItemPair.getKey()), new PipesVectorWritable( new DenseDoubleVector(new double[] { userItemPair.getValue(), rating }))); } dataWriter.close(); } return testItems; }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
public static List<Preference<Long, Long>> convertInputData(Configuration conf, FileSystem fs, Path in, Path preferencesIn, String inputFile, String separator, int maxTestPrefs) throws IOException { List<Preference<Long, Long>> test_prefs = new ArrayList<Preference<Long, Long>>(); // Delete input files if already exist if (fs.exists(in)) { fs.delete(in, true);//from w w w . ja v a 2 s . c o m } if (fs.exists(preferencesIn)) { fs.delete(preferencesIn, true); } final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn, LongWritable.class, PipesVectorWritable.class, CompressionType.NONE); BufferedReader br = new BufferedReader(new FileReader(inputFile)); String line; while ((line = br.readLine()) != null) { String[] values = line.split(separator); long userId = Long.parseLong(values[0]); long itemId = Long.parseLong(values[1]); double rating = Double.parseDouble(values[2]); // System.out.println("userId: " + userId + " itemId: " + itemId // + " rating: " + rating); double vector[] = new double[2]; vector[0] = itemId; vector[1] = rating; prefWriter.append(new LongWritable(userId), new PipesVectorWritable(new DenseDoubleVector(vector))); // Add test preferences maxTestPrefs--; if (maxTestPrefs > 0) { test_prefs.add(new Preference<Long, Long>(userId, itemId, rating)); } } br.close(); prefWriter.close(); return test_prefs; }
From source file:audr.text.utils.FileUtils.java
License:Open Source License
/** * /*from ww w . ja v a 2 s.com*/ * * @param path * @return true false */ public static boolean isDirectoryExist(Path path) { int existFlag = 0; try { Configuration conf = new Configuration(); FileSystem dst = FileSystem.get(conf); if (dst.exists(path)) existFlag = 1; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (existFlag == 1) return true; return false; }
From source file:azkaban.crypto.Decryptions.java
License:Open Source License
public String decrypt(final String cipheredText, final String passphrasePath, final FileSystem fs) throws IOException { Preconditions.checkNotNull(cipheredText); Preconditions.checkNotNull(passphrasePath); final Path path = new Path(passphrasePath); Preconditions.checkArgument(fs.exists(path), "File does not exist at " + passphrasePath); Preconditions.checkArgument(fs.isFile(path), "Passphrase path is not a file. " + passphrasePath); final FileStatus fileStatus = fs.getFileStatus(path); Preconditions.checkArgument(USER_READ_PERMISSION_ONLY.equals(fileStatus.getPermission()), "Passphrase file should only have read only permission on only user. " + passphrasePath); final Crypto crypto = new Crypto(); try (BufferedReader br = new BufferedReader( new InputStreamReader(fs.open(path), Charset.defaultCharset()))) { final String passphrase = br.readLine(); final String decrypted = crypto.decrypt(cipheredText, passphrase); Preconditions.checkNotNull(decrypted, "Was not able to decrypt"); return decrypted; }/*from w ww. j a v a 2 s .c o m*/ }