List of usage examples for org.apache.hadoop.io SequenceFile createWriter
@Deprecated public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec) throws IOException
From source file:PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi// ww w . j a va 2 s . com */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { // setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); // setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { // generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } // start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); // read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } // compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:Importer.java
License:Open Source License
public static void copyFile(File file) throws Exception { // String TEST_PREFIX = ""; File destFile = new File(outDir, file.getName() + ".seq"); Path dest = new Path(destFile.getAbsolutePath()); Configuration conf = new Configuration(); FileSystem fileSys = org.apache.hadoop.fs.FileSystem.get(new java.net.URI(conf.get("fs.default.name")), conf);// w ww.j av a2s .c o m CompressionCodec codec = new DefaultCodec(); fileSys.mkdirs(dest.getParent()); FSDataOutputStream outputStr = fileSys.create(dest); seqFileWriter = SequenceFile.createWriter(conf, outputStr, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, codec); String filename = file.getName(); InputStream in = new BufferedInputStream(new FileInputStream(file)); if (filename.endsWith(".bz2")) { in.read(); in.read(); //snarf header in = new CBZip2InputStream(in); } BufferedReader br = new BufferedReader(new InputStreamReader(in, "US-ASCII")); System.out.println("working on file " + file); int records = 0; long bytes = 0, bytes_since_status = 0; long startTime = System.currentTimeMillis(); String s = null; Text content = new Text(); while ((s = br.readLine()) != null) { if (s.startsWith("---END.OF.DOCUMENT---")) { Text name = new Text(hash(content)); seqFileWriter.append(name, content); records++; content = new Text(); } else { byte[] line_as_bytes = (s + " ").getBytes(); for (byte b : line_as_bytes) { assert b < 128 : "found an unexpected high-bit set"; } content.append(line_as_bytes, 0, line_as_bytes.length); bytes += line_as_bytes.length; /* bytes_since_status += line_as_bytes.length; if(bytes_since_status > 10 * 1024 * 1024) { //every 10 MB System.err.print('.'); bytes_since_status = 0; }*/ } } //end while if (content.getLength() > 5) { Text name = new Text(hash(content)); seqFileWriter.append(name, content); records++; } totalBytes += bytes; totalRecords += records; long time = (System.currentTimeMillis() - startTime) / 1000 + 1; long kbSec = bytes / 1024 / time; System.out.println(new java.util.Date()); System.out.println("File " + file.getName() + " " + records + " records, " + bytes + " bytes in " + time + " seconds (" + kbSec + " KB/sec)."); in.close(); seqFileWriter.close(); outputStr.close(); }
From source file:alluxio.client.hadoop.DFSIOIntegrationTest.java
License:Apache License
@SuppressWarnings("deprecation") private void createControlFile(org.apache.hadoop.fs.FileSystem fs, long nrBytes, // in bytes int nrFiles) throws IOException { LOG.info("creating control file: " + nrBytes + " bytes, " + nrFiles + " files"); Path controlDir = getControlDir(mConfig); if (!fs.exists(controlDir)) { fs.delete(controlDir, true);/*from w w w . j a v a2 s.c o m*/ for (int i = 0; i < nrFiles; i++) { String name = getFileName(i); Path controlFile = new Path(controlDir, "in_file_" + name); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, mConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE); writer.append(new Text(name), new LongWritable(nrBytes)); } catch (Exception e) { throw new IOException(e.getLocalizedMessage()); } finally { if (writer != null) { writer.close(); } writer = null; } } } LOG.info("created control files for: " + nrFiles + " files"); }
From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java
License:Apache License
private static void prepareInput(Configuration conf, Path inputPath, Path exampleFile, int n) throws IOException { FileSystem fs = inputPath.getFileSystem(conf); // Create input file writers depending on bspTaskNum int bspTaskNum = conf.getInt("bsp.peers.num", 1); SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum]; for (int i = 0; i < bspTaskNum; i++) { Path inputFile = new Path(inputPath, "input" + i + ".seq"); LOG.info("inputFile: " + inputFile.toString()); inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, NullWritable.class, CompressionType.NONE);// ww w . java 2 s . c o m } // Create example file writer SequenceFile.Writer exampleWriter = SequenceFile.createWriter(fs, conf, exampleFile, IntWritable.class, NullWritable.class, CompressionType.NONE); // Write random values to input files and example IntWritable inputKey = new IntWritable(); NullWritable nullValue = NullWritable.get(); Random r = new Random(); for (long i = 0; i < n; i++) { inputKey.set(r.nextInt(n)); for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].append(inputKey, nullValue); } inputKey.set(r.nextInt(n)); exampleWriter.append(inputKey, nullValue); } // Close file writers for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].close(); } exampleWriter.close(); }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
private void recalculateAssignmentsAndWrite( BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer) throws IOException { IntWritable keyWrite = new IntWritable(); for (DoubleVector v : m_cache) { final int lowestDistantCenter = getNearestCenter(v); keyWrite.set(lowestDistantCenter); peer.write(keyWrite, new PipesVectorWritable(v)); }/*from w w w. ja v a 2s . c om*/ // just on the first task write the centers to filesystem to prevent // collisions if (peer.getPeerName().equals(peer.getPeerName(0))) { String pathString = m_conf.get(CONF_CENTER_OUT_PATH); if (pathString != null) { final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(m_conf), m_conf, new Path(pathString), PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); final NullWritable value = NullWritable.get(); for (DoubleVector center : m_centers_cpu) { dataWriter.append(new PipesVectorWritable(center), value); } dataWriter.close(); } } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
@Override public void bspGpu( BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer, Rootbeer rootbeer) throws IOException, SyncException, InterruptedException { long startTime = 0; if (m_timeMeasurement) { startTime = System.currentTimeMillis(); }/* ww w . j a v a 2s . c o m*/ // Fetch inputs final List<DoubleVector> inputs = new ArrayList<DoubleVector>(); final PipesVectorWritable key = new PipesVectorWritable(); final NullWritable nullValue = NullWritable.get(); while (peer.readNext(key, nullValue)) { inputs.add(key.getVector()); } // Convert inputs to double[][] double[][] inputsArr = new double[inputs.size()][inputs.get(0).getLength()]; for (int i = 0; i < inputs.size(); i++) { double[] vector = inputs.get(i).toArray(); for (int j = 0; j < vector.length; j++) { inputsArr[i][j] = vector[j]; } } // Logging if (m_isDebuggingEnabled) { m_logger.writeChars("KMeansHybrid.bspGpu executed on GPU!\n"); m_logger.writeChars( "KMeansHybrid.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize + "\n"); m_logger.writeChars("KMeansHybrid.bspGpu inputSize: " + inputs.size() + "\n"); } KMeansHybridKernel kernel = new KMeansHybridKernel(inputsArr, m_centers_gpu, m_conf.getInt(CONF_MAX_ITERATIONS, 0), peer.getAllPeerNames()); // Run GPU Kernels Context context = rootbeer.createDefaultContext(); Stopwatch watch = new Stopwatch(); watch.start(); rootbeer.run(kernel, new ThreadConfig(m_blockSize, m_gridSize, m_blockSize * m_gridSize), context); watch.stop(); // Output inputs with corresponding new center id for (int i = 0; i < inputs.size(); i++) { peer.write(new IntWritable(kernel.m_input_centers[i]), new PipesVectorWritable(inputs.get(i))); } // Output new Centers only on first task // to prevent collisions if (peer.getPeerName().equals(peer.getPeerName(0))) { String pathString = m_conf.get(CONF_CENTER_OUT_PATH); if (pathString != null) { final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(m_conf), m_conf, new Path(pathString), PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); for (int i = 0; i < kernel.m_centers.length; i++) { dataWriter.append(new PipesVectorWritable(new DenseDoubleVector(kernel.m_centers[i])), nullValue); } dataWriter.close(); } } long stopTime = System.currentTimeMillis(); if (m_timeMeasurement) { LOG.info("# bspGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec"); if (m_isDebuggingEnabled) { m_logger.writeChars( "PiEstimatorHybrid,bspGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec\n"); } } // Logging if (m_isDebuggingEnabled) { List<StatsRow> stats = context.getStats(); for (StatsRow row : stats) { m_logger.writeChars(" StatsRow:\n"); m_logger.writeChars(" serial time: " + row.getSerializationTime() + "\n"); m_logger.writeChars(" exec time: " + row.getExecutionTime() + "\n"); m_logger.writeChars(" deserial time: " + row.getDeserializationTime() + "\n"); m_logger.writeChars(" num blocks: " + row.getNumBlocks() + "\n"); m_logger.writeChars(" num threads: " + row.getNumThreads() + "\n"); m_logger.writeChars("GPUTime: " + watch.elapsedTimeMillis() + " ms" + "\n"); } m_logger.close(); } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
/** * prepareInputData/*from w w w . j ava 2s . c o m*/ * */ public static void prepareInputData(Configuration conf, FileSystem fs, Path in, Path centerIn, int numBspTask, int numGPUBspTask, long n, int k, int vectorDimension, Random rand, int GPUPercentage) throws IOException { // Delete input files if already exist if (fs.exists(in)) { fs.delete(in, true); } if (fs.exists(centerIn)) { fs.delete(centerIn, true); } final NullWritable nullValue = NullWritable.get(); final SequenceFile.Writer centerWriter = SequenceFile.createWriter(fs, conf, centerIn, PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); // Compute work distributions int cpuTaskNum = numBspTask - numGPUBspTask; long inputVectorsPerGPUTask = 0; long inputVectorsPerCPU = 0; long inputVectorsPerCPUTask = 0; if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) { inputVectorsPerGPUTask = (n * GPUPercentage) / 100; inputVectorsPerCPU = n - inputVectorsPerGPUTask; } else { inputVectorsPerCPU = n; } if (cpuTaskNum > 0) { inputVectorsPerCPUTask = inputVectorsPerCPU / cpuTaskNum; } // long interval = totalNumberOfPoints / numBspTask; long centers = 0; for (int part = 0; part < numBspTask; part++) { Path partIn = new Path(in, "part" + part + ".seq"); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); long interval = 0; if (part > cpuTaskNum) { interval = inputVectorsPerGPUTask; } else { interval = inputVectorsPerCPUTask; } long start = interval * part; long end = start + interval - 1; if ((numBspTask - 1) == part) { end = n; // set to totalNumberOfPoints } LOG.info("Partition " + part + ": from " + start + " to " + end); for (long i = start; i <= end; i++) { double[] arr = new double[vectorDimension]; for (int j = 0; j < vectorDimension; j++) { if (rand != null) { arr[j] = rand.nextInt((int) n); } else { arr[j] = i; } } PipesVectorWritable vector = new PipesVectorWritable(new DenseDoubleVector(arr)); // LOG.info("input[" + i + "]: " + Arrays.toString(arr)); dataWriter.append(vector, nullValue); if (k > centers) { // LOG.info("center[" + i + "]: " + Arrays.toString(arr)); centerWriter.append(vector, nullValue); centers++; } else { centerWriter.close(); } } dataWriter.close(); } }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
/** * Create testExample vectors and centers as input from * http://www.maplesoft.com/support/help/Maple/view.aspx?path=NAG/g03efc * // w w w. j a v a 2s. c om * n := 20: vectorDimension := 5: k := 3: maxIterations := 10: * * x := Matrix([ [77.3, 13, 9.699999999999999, 1.5, 6.4], [82.5, 10, 7.5, 1.5, * 6.5], [66.90000000000001, 20.6, 12.5, 2.3, 7], [47.2, 33.8, 19, 2.8, 5.8], * [65.3, 20.5, 14.2, 1.9, 6.9], [83.3, 10, 6.7, 2.2, 7], [81.59999999999999, * 12.7, 5.7, 2.9, 6.7], [47.8, 36.5, 15.7, 2.3, 7.2], [48.6, 37.1, 14.3, 2.1, * 7.2], [61.6, 25.5, 12.9, 1.9, 7.3], [58.6, 26.5, 14.9, 2.4, 6.7], [69.3, * 22.3, 8.4, 4, 7], [61.8, 30.8, 7.4, 2.7, 6.4], [67.7, 25.3, 7, 4.8, 7.3], * [57.2, 31.2, 11.6, 2.4, 6.5], [67.2, 22.7, 10.1, 3.3, 6.2], [59.2, 31.2, * 9.6, 2.4, 6], [80.2, 13.2, 6.6, 2, 5.8], [82.2, 11.1, 6.7, 2.2, 7.2], * [69.7, 20.7, 9.6, 3.1, 5.9]], datatype=float[8], order='C_order'): * * cmeans := Matrix( [[82.5, 10, 7.5, 1.5, 6.5], [47.8, 36.5, 15.7, 2.3, 7.2], * [67.2, 22.7, 10.1, 3.3, 6.2]], datatype=float[8], order='C_order'): * * * Results * * cmeans := Matrix([ [81.1833333333333371, 11.6666666666666661, * 7.1499999999999947, 2.0500000000000027, 6.6000000000000052], * [47.8666666666666671, 35.8000000000000043, 16.3333333333333321, * 2.3999999999999992, 6.7333333333333340], [64.0454545454545610, * 25.2090909090909037, 10.7454545454545425, 2.83636363636363642, * 6.65454545454545521]]): * * inc := Vector([0, 0, 2, 1, 2, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, * 2]): * * nic := Vector([6, 3, 11]): * * css := Vector([46.5716666666666583, 20.3800000000000097, * 468.896363636363503]): * */ public static void prepareTestInput(Configuration conf, FileSystem fs, Path in, Path centerIn) throws IOException { // Delete input files if already exist if (fs.exists(in)) { fs.delete(in, true); } if (fs.exists(centerIn)) { fs.delete(centerIn, true); } double[][] input = { { 77.3, 13, 9.699999999999999, 1.5, 6.4 }, { 82.5, 10, 7.5, 1.5, 6.5 }, { 66.90000000000001, 20.6, 12.5, 2.3, 7 }, { 47.2, 33.8, 19, 2.8, 5.8 }, { 65.3, 20.5, 14.2, 1.9, 6.9 }, { 83.3, 10, 6.7, 2.2, 7 }, { 81.59999999999999, 12.7, 5.7, 2.9, 6.7 }, { 47.8, 36.5, 15.7, 2.3, 7.2 }, { 48.6, 37.1, 14.3, 2.1, 7.2 }, { 61.6, 25.5, 12.9, 1.9, 7.3 }, { 58.6, 26.5, 14.9, 2.4, 6.7 }, { 69.3, 22.3, 8.4, 4, 7 }, { 61.8, 30.8, 7.4, 2.7, 6.4 }, { 67.7, 25.3, 7, 4.8, 7.3 }, { 57.2, 31.2, 11.6, 2.4, 6.5 }, { 67.2, 22.7, 10.1, 3.3, 6.2 }, { 59.2, 31.2, 9.6, 2.4, 6 }, { 80.2, 13.2, 6.6, 2, 5.8 }, { 82.2, 11.1, 6.7, 2.2, 7.2 }, { 69.7, 20.7, 9.6, 3.1, 5.9 } }; double[][] centers = { { 82.5, 10, 7.5, 1.5, 6.5 }, { 47.8, 36.5, 15.7, 2.3, 7.2 }, { 67.2, 22.7, 10.1, 3.3, 6.2 } }; final NullWritable nullValue = NullWritable.get(); // Write inputs LOG.info("inputs: "); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, in, PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); for (int i = 0; i < input.length; i++) { dataWriter.append(new PipesVectorWritable(new DenseDoubleVector(input[i])), nullValue); LOG.info("input[" + i + "]: " + Arrays.toString(input[i])); } dataWriter.close(); // Write centers LOG.info("centers: "); final SequenceFile.Writer centerWriter = SequenceFile.createWriter(fs, conf, centerIn, PipesVectorWritable.class, NullWritable.class, CompressionType.NONE); for (int i = 0; i < centers.length; i++) { centerWriter.append(new PipesVectorWritable(new DenseDoubleVector(centers[i])), nullValue); LOG.info("center[" + i + "]: " + Arrays.toString(centers[i])); } centerWriter.close(); }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java
License:Apache License
public static List<Path> writeDistributedRowMatrix(Configuration conf, double[][] matrix, int rows, int columns, Path path, int numBspTask, int numGPUBspTask, int GPUPercentage) throws IOException { List<Path> splittedFiles = new ArrayList<Path>(); // Compute work distributions int cpuTaskNum = numBspTask - numGPUBspTask; int inputVectorsPerGPUTask = 0; int inputVectorsPerCPU = 0; int inputVectorsPerCPUTask = 0; if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) { inputVectorsPerGPUTask = (rows * GPUPercentage) / 100; inputVectorsPerCPU = rows - inputVectorsPerGPUTask; } else {//from w w w . ja v a2s .c o m inputVectorsPerCPU = rows; } if (cpuTaskNum > 0) { inputVectorsPerCPUTask = inputVectorsPerCPU / cpuTaskNum; } for (int part = 0; part < numBspTask; part++) { Path partIn = new Path(path, "part" + part + ".seq"); splittedFiles.add(partIn); FileSystem fs = FileSystem.get(conf); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, IntWritable.class, VectorWritable.class, CompressionType.NONE); int interval = 0; if (part > cpuTaskNum) { interval = inputVectorsPerGPUTask; } else { interval = inputVectorsPerCPUTask; } int start = interval * part; int end = start + interval; if ((numBspTask - 1) == part) { end = rows; // set to totalRows } LOG.info("Partition " + part + " file " + partIn.getParent().getName() + "/" + partIn.getName() + " from " + start + " to " + (end - 1)); for (int i = start; i < end; i++) { DenseDoubleVector rowVector = new DenseDoubleVector(matrix[i]); dataWriter.append(new IntWritable(i), new VectorWritable(rowVector)); } dataWriter.close(); } return splittedFiles; }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFHybridBenchmark.java
License:Apache License
public static List<double[]> generateRandomInputData(Configuration conf, FileSystem fs, Path in, int numBspTask, int numGPUBspTask, int userCount, int itemCount, int percentNonZeroValues, int GPUPercentage, int maxTestPrefs) throws IOException { // Delete input directory if already exist if (fs.exists(in)) { fs.delete(in, true);//from w w w. ja v a2 s . c o m } Random rand = new Random(32L); Set<Map.Entry<Long, Long>> userItemPairs = new HashSet<Map.Entry<Long, Long>>(); List<double[]> testItems = new ArrayList<double[]>(); int possibleUserItemRatings = userCount * itemCount; int userItemRatings = possibleUserItemRatings * percentNonZeroValues / 100; System.out.println("generateRandomInputData possibleRatings: " + possibleUserItemRatings + " ratings: " + userItemRatings); // Compute work distributions int cpuTaskNum = numBspTask - numGPUBspTask; long ratingsPerGPUTask = 0; long ratingsPerCPU = 0; long ratingsPerCPUTask = 0; if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) { ratingsPerGPUTask = (userItemRatings * GPUPercentage) / 100; ratingsPerCPU = userItemRatings - ratingsPerGPUTask; } else { ratingsPerCPU = userItemRatings; } if (cpuTaskNum > 0) { ratingsPerCPUTask = ratingsPerCPU / cpuTaskNum; } System.out.println("generateRandomInputData ratingsPerGPUTask: " + ratingsPerGPUTask + " ratingsPerCPU: " + ratingsPerCPU + " ratingsPerCPUTask: " + ratingsPerCPUTask); for (int part = 0; part < numBspTask; part++) { Path partIn = new Path(in, "part" + part + ".seq"); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, LongWritable.class, PipesVectorWritable.class, CompressionType.NONE); long interval = 0; if (part > cpuTaskNum) { interval = ratingsPerGPUTask; } else { interval = ratingsPerCPUTask; } long start = interval * part; long end = start + interval - 1; if ((numBspTask - 1) == part) { end = userItemRatings; } System.out.println("Partition " + part + ": from " + start + " to " + end); for (long i = start; i <= end; i++) { // Find new user item rating which was not used before Map.Entry<Long, Long> userItemPair; do { long userId = rand.nextInt(userCount); long itemId = rand.nextInt(itemCount); userItemPair = new AbstractMap.SimpleImmutableEntry<Long, Long>(userId, itemId); } while (userItemPairs.contains(userItemPair)); // Add user item rating userItemPairs.add(userItemPair); // Generate rating int rating = rand.nextInt(5) + 1; // values between 1 and 5 // Add user item rating to test data if (i < maxTestPrefs) { testItems.add(new double[] { userItemPair.getKey(), userItemPair.getValue(), rating }); } // Write out user item rating dataWriter.append(new LongWritable(userItemPair.getKey()), new PipesVectorWritable( new DenseDoubleVector(new double[] { userItemPair.getValue(), rating }))); } dataWriter.close(); } return testItems; }