List of usage examples for org.apache.hadoop.io SequenceFile createWriter
@Deprecated public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass) throws IOException
From source file:ComRoughSetApproInputSampler.java
License:Apache License
/** * Write a partition file for the given job, using the Sampler provided. * Queries the sampler for a sample keyset, sorts by the output key * comparator, selects the keys for each rank, and writes to the destination * returned from {@link TotalOrderPartitioner#getPartitionFile}. *//*w w w . j a v a2 s .co m*/ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = job.getConfiguration(); final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = (K[]) sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
From source file:ac.keio.sslab.nlp.lda.RowIdJob.java
License:Apache License
@SuppressWarnings("deprecation") @Override//w w w . ja va 2 s. c om public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path outputPath = getOutputPath(); Path indexPath = new Path(outputPath, "docIndex"); Path matrixPath = new Path(outputPath, "matrix"); try (SequenceFile.Writer indexWriter = SequenceFile.createWriter(fs, conf, indexPath, IntWritable.class, Text.class); SequenceFile.Writer matrixWriter = SequenceFile.createWriter(fs, conf, matrixPath, IntWritable.class, VectorWritable.class)) { IntWritable docId = new IntWritable(); int i = 0; int numCols = 0; for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>( getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), null, true, conf)) { VectorWritable value = record.getSecond(); docId.set(i); indexWriter.append(docId, record.getFirst()); matrixWriter.append(docId, value); i++; numCols = value.get().size(); } log.info("Wrote out matrix with {} rows and {} columns to {}", i, numCols, matrixPath); return 0; } }
From source file:bigsatgps.BigDataHandler.java
License:Open Source License
/** * * @param infile/*from ww w . j a v a 2 s. c o m*/ * @return * @throws Exception */ public String ImageToSequence(String infile) throws Exception { String log4jConfPath = "lib/log4j.properties"; PropertyConfigurator.configure(log4jConfPath); confHadoop = new Configuration(); confHadoop.addResource(new Path("/hadoop/projects/hadoop-1.0.4/conf/core-site.xml")); confHadoop.addResource(new Path("/hadoop/projects/hadoop-1.0.4/conf/hdfs-site.xml")); FileSystem fs = FileSystem.get(confHadoop); Path inPath = new Path(infile); String outfile = infile.substring(0, infile.indexOf(".")) + ".seq"; Path outPath = new Path(outfile); System.out.println(); System.out.println("Successfully created the sequencefile " + outfile); FSDataInputStream in = null; Text key = new Text(); BytesWritable value = new BytesWritable(); SequenceFile.Writer writer = null; try { in = fs.open(inPath); byte buffer[] = new byte[in.available()]; in.read(buffer); writer = SequenceFile.createWriter(fs, confHadoop, outPath, key.getClass(), value.getClass()); writer.append(new Text(inPath.getName()), new BytesWritable(buffer)); IOUtils.closeStream(writer); return outfile; } catch (IOException e) { System.err.println("Exception MESSAGES = " + e.getMessage()); IOUtils.closeStream(writer); return null; } }
From source file:boa.datagen.SeqProjectCombiner.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://boa-njt/"); FileSystem fileSystem = FileSystem.get(conf); String base = conf.get("fs.default.name", ""); HashMap<String, String> sources = new HashMap<String, String>(); HashSet<String> marks = new HashSet<String>(); FileStatus[] files = fileSystem.listStatus(new Path(base + "tmprepcache/2015-07")); for (int i = 0; i < files.length; i++) { FileStatus file = files[i];//from w w w .ja v a 2s .c om String name = file.getPath().getName(); if (name.startsWith("projects-") && name.endsWith(".seq")) { System.out.println("Reading file " + i + " in " + files.length + ": " + name); SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, file.getPath(), conf); final Text key = new Text(); final BytesWritable value = new BytesWritable(); try { while (r.next(key, value)) { String s = key.toString(); if (marks.contains(s)) continue; Project p = Project .parseFrom(CodedInputStream.newInstance(value.getBytes(), 0, value.getLength())); if (p.getCodeRepositoriesCount() > 0 && p.getCodeRepositories(0).getRevisionsCount() > 0) marks.add(s); sources.put(s, name); } } catch (Exception e) { System.err.println(name); e.printStackTrace(); } r.close(); } } SequenceFile.Writer w = SequenceFile.createWriter(fileSystem, conf, new Path(base + "repcache/2015-07/projects.seq"), Text.class, BytesWritable.class); for (int i = 0; i < files.length; i++) { FileStatus file = files[i]; String name = file.getPath().getName(); if (name.startsWith("projects-") && name.endsWith(".seq")) { System.out.println("Reading file " + i + " in " + files.length + ": " + name); SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, file.getPath(), conf); final Text key = new Text(); final BytesWritable value = new BytesWritable(); try { while (r.next(key, value)) { String s = key.toString(); if (sources.get(s).equals(name)) w.append(key, value); } } catch (Exception e) { System.err.println(name); e.printStackTrace(); } r.close(); } } w.close(); fileSystem.close(); }
From source file:boa.datagen.SeqSortMerge.java
License:Apache License
public static void main(String[] args) throws IOException { conf.set("fs.default.name", base); FileSystem fs = FileSystem.get(conf); String inPath = "/tmprepcache/2015-07-sorted/"; while (true) { FileStatus[] files = fs.listStatus(new Path(inPath)); if (files.length < 2) break; Path path = new Path(inPath + System.currentTimeMillis()); fs.mkdirs(path);/*from w ww.java2 s .c o m*/ SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, new Path(inPath + path.getName() + "/part-00000"), Text.class, BytesWritable.class); FileStatus[] candidates = getCandidates(files); System.out.println("Merging " + candidates.length + " from " + files.length); SequenceFile.Reader[] readers = new SequenceFile.Reader[candidates.length]; for (int i = 0; i < candidates.length; i++) readers[i] = new SequenceFile.Reader(fs, new Path(inPath + candidates[i].getPath().getName() + "/part-00000"), conf); Text[] keys = new Text[candidates.length]; BytesWritable[] values = new BytesWritable[candidates.length]; read(readers, keys, values); while (true) { int index = min(keys); if (keys[index].toString().isEmpty()) break; w.append(keys[index], values[index]); read(readers[index], keys[index], values[index]); } for (int i = 0; i < readers.length; i++) readers[i].close(); w.close(); for (int i = 0; i < readers.length; i++) fs.delete(new Path(inPath + candidates[i].getPath().getName()), true); } }
From source file:cn.com.warlock.SequenceFilesTest.java
License:Apache License
public static void main(String[] args) throws IOException { String hdfsUri = "hdfs://hlg-2p238-fandongsheng:8020"; String pathStr = "/tmp/example/seq1"; String compressType = "1"; // ??windows? // System.setProperty("hadoop.home.dir", "E:\\tools"); Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsUri); Path path = new Path(pathStr); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try {/*w w w. jav a 2 s. c o m*/ SequenceFile.Writer.Option pathOpt = SequenceFile.Writer.file(path); SequenceFile.Writer.Option keyClassOpt = SequenceFile.Writer.keyClass(key.getClass()); SequenceFile.Writer.Option valueClassOpt = SequenceFile.Writer.valueClass(value.getClass()); SequenceFile.Writer.Option compressionOpt = null; // compress type if (compressType.equals("1")) { System.out.println("compress none"); compressionOpt = SequenceFile.Writer.compression(CompressionType.NONE); } else if (compressType.equals("2")) { System.out.println("compress record"); compressionOpt = SequenceFile.Writer.compression(CompressionType.RECORD); } else if (compressType.equals("3")) { System.out.println("compress block"); compressionOpt = SequenceFile.Writer.compression(CompressionType.BLOCK); } else { System.out.println("Default : compress none"); compressionOpt = SequenceFile.Writer.compression(CompressionType.NONE); } writer = SequenceFile.createWriter(conf, pathOpt, keyClassOpt, valueClassOpt, compressionOpt); for (int i = 0; i < 100; i++) { key.set(100 - i); value.set(DATA[i % DATA.length]); System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:co.nubetech.hiho.common.HihoTestCase.java
License:Apache License
public void createSequenceFileInHdfs(HashMap inputData, String filePath, String nameOfFile) throws Exception { Configuration conf = new Configuration(); FileSystem fs = getFileSystem(); Path inputFile = new Path(filePath + "/" + nameOfFile); SequenceFile.Writer writer = null; SequenceFile.Reader reader = null; try {/*from w ww .j ava 2 s . co m*/ Set key = inputData.keySet(); Object keyValue = key.iterator().next(); writer = SequenceFile.createWriter(fs, conf, inputFile, keyValue.getClass(), inputData.get(keyValue).getClass()); logger.debug("key class is: " + keyValue.getClass()); logger.debug("val class is: " + inputData.get(keyValue).getClass()); Iterator valIterator = inputData.values().iterator(); Iterator keyIterator = inputData.keySet().iterator(); while (keyIterator.hasNext()) { writer.append(keyIterator.next(), valIterator.next()); } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(writer); } }
From source file:co.nubetech.hiho.testdata.SequenceFileForCustomObject.java
License:Apache License
public static void main(String[] args) throws IOException { String uri = "inputnew.seq"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); // IntWritable key = new IntWritable(); // Student value[] = new Student[10]; SequenceFile.Writer writer = null; Student student = new Student(); try {//from w w w . ja v a2 s . co m writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Student.class); for (int i = 0; i < 10; i++) { student.setId(id[i]); student.setName(name[i]); student.setAddress(address[i]); student.setMobileNumber(mobileNo[i]); student.setPercentage(percentage[i]); // value[i]=student; // System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, // value); writer.append(student.getId(), student); } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(writer); } }
From source file:co.nubetech.hiho.testdata.SequenceFileWriteDemo.java
License:Apache License
public static void main(String[] args) throws IOException { String uri = "input2.seq"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try {//from w w w . j a v a2 s .c o m writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < 2; i++) { key.set(2 - i); value.set(DATA[i % DATA.length]); System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); writer.append(key, value); } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(writer); } }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
/** * Write out a SequenceFile that can be read by TotalOrderPartitioner * that contains the split points in startKeys. * @param partitionsPath output path for SequenceFile * @param startKeys the region start keys */// www . j a v a 2 s .co m private static void writePartitions(Configuration conf, Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException { if (startKeys.isEmpty()) { throw new IllegalArgumentException("No regions passed"); } // We're generating a list of split points, and we don't ever // have keys < the first region (which has an empty start key) // so we need to remove it. Otherwise we would end up with an // empty reducer with index 0 TreeSet<ImmutableBytesWritable> sorted = new TreeSet<ImmutableBytesWritable>(startKeys); ImmutableBytesWritable first = sorted.first(); if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) { throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.get())); } sorted.remove(first); // Write the actual file FileSystem fs = partitionsPath.getFileSystem(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class); try { for (ImmutableBytesWritable startKey : sorted) { writer.append(startKey, NullWritable.get()); } } finally { writer.close(); } }