List of usage examples for org.apache.hadoop.io IntWritable set
public void set(int value)
From source file:org.shadowmask.engine.hive.udf.UDFMobileTest.java
License:Apache License
@Test public void testUDFMobile() { UDFMobile udfMobile = new UDFMobile(); Text mobile = new Text("13566668888"); IntWritable mask = new IntWritable(0); Text result = udfMobile.evaluate(mobile, mask); assertEquals("13566668888", result.toString()); mask.set(1); result = udfMobile.evaluate(mobile, mask); assertEquals("1356666****", result.toString()); mask.set(2);/* w w w. j a va2s .com*/ result = udfMobile.evaluate(mobile, mask); assertEquals("135********", result.toString()); mask.set(3); result = udfMobile.evaluate(mobile, mask); assertEquals("***********", result.toString()); mobile = null; result = udfMobile.evaluate(mobile, mask); assertNull(result); }
From source file:org.shadowmask.engine.hive.udf.UDFTimestampTest.java
License:Apache License
@Test public void testUDFTimestamp() { UDFTimestamp udfTimestamp = new UDFTimestamp(); long time = DateTime.parse("2016-09-18T10:30:32.222").getMillis(); TimestampWritable timestamp = new TimestampWritable(); timestamp.setTime(time);/*from w w w. j a v a 2 s. c om*/ IntWritable mask = new IntWritable(0); TimestampWritable result = udfTimestamp.evaluate(timestamp, mask); assertEquals(time, result.getTimestamp().getTime()); mask.set(1); result = udfTimestamp.evaluate(timestamp, mask); long expect1 = DateTime.parse("2016-09-18T10:30:32.000").getMillis(); assertEquals(expect1, result.getTimestamp().getTime()); mask.set(2); result = udfTimestamp.evaluate(timestamp, mask); long expect2 = DateTime.parse("2016-09-18T10:30:00.000").getMillis(); assertEquals(expect2, result.getTimestamp().getTime()); mask.set(3); result = udfTimestamp.evaluate(timestamp, mask); long expect3 = DateTime.parse("2016-09-18T10:00:00.000").getMillis(); assertEquals(expect3, result.getTimestamp().getTime()); mask.set(4); result = udfTimestamp.evaluate(timestamp, mask); long expect4 = DateTime.parse("2016-09-18T00:00:00.000").getMillis(); assertEquals(expect4, result.getTimestamp().getTime()); mask.set(5); result = udfTimestamp.evaluate(timestamp, mask); long expect5 = DateTime.parse("2016-09-01T00:00:00.000").getMillis(); assertEquals(expect5, result.getTimestamp().getTime()); mask.set(6); result = udfTimestamp.evaluate(timestamp, mask); long expect6 = DateTime.parse("2016-01-01T00:00:00.000").getMillis(); assertEquals(expect6, result.getTimestamp().getTime()); mask.set(7); result = udfTimestamp.evaluate(timestamp, mask); long expect7 = DateTime.parse("1901-01-01T00:00:00.000").getMillis(); assertEquals(expect7, result.getTimestamp().getTime()); timestamp = null; result = udfTimestamp.evaluate(timestamp, mask); assertNull(result); }
From source file:org.swjtu.helloworldcn.APCDriver.java
License:Apache License
/** * @param conf/* ww w . j a va2s. c o m*/ * the Configuration to be used * @param inputPath * the Path to the input tuples directory * @param outputCalc * the Path to the output directory * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ private void parallelUpdateRA(Configuration conf, Path inputPath, Path outputCalc) throws IOException, ClassNotFoundException, InterruptedException { Boolean dn = false; FileSystem fs = FileSystem.get(conf); int i = -1; Vector diagAplusR = null; while (!dn) { i = i + 1; Path outputPath = new Path(outputCalc, "parallelUpdateRA-" + i); if (i > 1) { HadoopUtil.delete(conf, new Path(outputCalc, "parallelUpdateRA-" + (i - 2))); } inputPath = APCParallelUpdateRAJob.runJob(inputPath, outputPath, numDims, lamda); diagAplusR = APCGetDiagAplusRJob.runJob(inputPath, numDims); //System.out.println("diag"+diagAplusR); RandomAccessSparseVector E = new RandomAccessSparseVector(numDims, 100); //RandomAccessSparseVector E = new RandomAccessSparseVector(diagAplusR); Iterator<Element> iter = diagAplusR.iterateNonZero(); int K = 0; while (iter.hasNext()) { Element element = iter.next(); if (element.get() > 0) { E.set(element.index(), 1.0); K++; } else { E.set(element.index(), 0.0); } } /*for (int c = 0; c < E.size(); c++) { if (E.get(c) > 0) { E.set(c, 1.0); } else { E.set(c, 0.0); } }*/ //System.out.println("con is"+convits); int index = i % convits; if (i >= convits) { e.remove(index); } e.add(index, E); //System.out.println(e); //double K = E.zSum(); if (i >= convits || i >= maxIterations) { RandomAccessSparseVector se = new RandomAccessSparseVector(numDims, 100); Iterator<RandomAccessSparseVector> iterator = e.iterator(); while (iterator.hasNext()) { RandomAccessSparseVector v = iterator.next(); se = (RandomAccessSparseVector) se.plus(v); } iter = se.iterateNonZero(); while (iter.hasNext()) { Element element = iter.next(); if (element.get() != convits) { unconverged = true; break; } else { unconverged = false; } } if ((!unconverged && K > 0) || i == maxIterations) { dn = true; } } } //Get exemplars exemplars = new ArrayList<Integer>(); Path outputExemplasPath = new Path(new Path(outputCalc, "exemplars"), "result"); IntWritable exValue = new IntWritable(); SequenceFile.Writer writerExemplas = SequenceFile.createWriter(fs, conf, outputExemplasPath, NullWritable.get().getClass(), exValue.getClass()); Iterator<Element> iter = diagAplusR.iterateNonZero(); while (iter.hasNext()) { Element element = iter.next(); if (element.get() > 0) { exemplars.add(element.index()); exValue.set(element.index()); writerExemplas.append(NullWritable.get(), exValue); } } /*for (int k = 0; k < numDims; k++) { if (diagAplusR.getQuick(k) > 0) { exemplars.add(k); exValue.set(k); writerExemplas.append(NullWritable.get(),exValue ); } }*/ writerExemplas.close(); clusteringResult = APCGetClusteringResultJob.runJob(inputPath, outputExemplasPath, numDims); /*for (int j = 0; j< exemplars.size(); j++) { clusteringResult.set(exemplars.get(j), exemplars.get(j)); }*/ //System.out.println("julei jieguo:"+clusteringResult); }
From source file:org.terrier.structures.collections.TestFSOrderedMapFile.java
License:Mozilla Public License
@Before public void setUp() throws Exception { file = tf.newFile("testFSOMapfile" + FSOrderedMapFile.USUAL_EXTENSION).toString(); MapFileWriter w = FSOrderedMapFile.mapFileWrite(file); FixedSizeWriteableFactory<Text> keyFactory = new FixedSizeTextFactory(20); int offset = 0; for (String key : testKeys) { Text wkey = keyFactory.newInstance(); IntWritable wvalue = new IntWritable(); wkey.set(key);//from w w w . j ava 2s . c om wvalue.set(offset); w.write(wkey, wvalue); key2id.put(key, offset); offset++; } w.close(); }
From source file:org.terrier.structures.collections.TestFSOrderedMapFile.java
License:Mozilla Public License
@Test public void testDupSuppres() throws Exception { String file = tf.newFile("testDupSuppres.fsomapfile").toString(); FixedSizeTextFactory keyFactory = new FixedSizeTextFactory(20); FixedSizeWriteableFactory<IntWritable> valueFactory = new FixedSizeIntWritableFactory(); MapFileWriter m = new FSOrderedMapFile.MultiFSOMapWriter(file, 2, keyFactory, valueFactory, true); Text k;//from w ww .j ava 2 s. c o m IntWritable v; k = keyFactory.newInstance(); k.set("a"); v = valueFactory.newInstance(); v.set(0); m.write(k, v); k = keyFactory.newInstance(); k.set("b"); v = valueFactory.newInstance(); v.set(1); m.write(k, v); ((Flushable) m).flush(); k = keyFactory.newInstance(); k.set("a"); v = valueFactory.newInstance(); v.set(2); m.write(k, v); k = keyFactory.newInstance(); k.set("c"); v = valueFactory.newInstance(); v.set(3); m.write(k, v); m.close(); FSOrderedMapFile<Text, IntWritable> map = new FSOrderedMapFile<Text, IntWritable>(file, false, keyFactory, valueFactory); assertEquals(3, map.size()); k = keyFactory.newInstance(); k.set("a"); assertEquals(0, map.get(k).get()); k = keyFactory.newInstance(); k.set("b"); assertEquals(1, map.get(k).get()); k = keyFactory.newInstance(); k.set("c"); assertEquals(3, map.get(k).get()); }
From source file:org.terrier.structures.collections.TestFSOrderedMapFile.java
License:Mozilla Public License
@Test public void testInMemoryJDKCollection() throws Exception { FixedSizeTextFactory keyFactory = new FixedSizeTextFactory(20); SortedMap<Text, IntWritable> map = new TreeMap<Text, IntWritable>(); int offset = 0; for (String key : testKeys) { Text wkey = keyFactory.newInstance(); IntWritable wvalue = new IntWritable(); wkey.set(key);/* www . jav a2 s . c o m*/ wvalue.set(offset); map.put(wkey, wvalue); offset++; } checkKeys(keyFactory, map); }
From source file:svm_scale.Svm_scale.java
License:Apache License
/** * Writes out the files for S and R in space-separated-values format * S is the entity table (the outer table for the join) * Schema of S is (SID Target ForeignKey XS) * R is the attribute table (the inner table for the join) * Schema of R is (RID XR)// w ww. j av a 2s . c o m * nS is number of tuples in S, nR is number of tuples in R * dS is number of features in S (including target), dR is number of features in R * Variance is a parameter for the Gaussian random number generator (1.0 is recommended) * Sfile is the prefix of the name for S files * Rfile is the name of the R file * The last argument enables generation of only R file or only S files */ public static void main(String[] args) throws IOException { if (args.length < 8) { System.out.println( "Usage: hadoop jar svm_scale.jar svm_scale.Svm_scale <Ratio of nS:nR> <nR> <dS> <dR> <Variance> <0 for binary | 1 for txt> <Sfile> <Rfile> [1 for R alone | 2 for S alone]"); System.exit(1); } int r = Integer.parseInt(args[0]); int nR = Integer.parseInt(args[1]); int dS = Integer.parseInt(args[2]); //includes target int dR = Integer.parseInt(args[3]); double var = Integer.parseInt(args[4]); System.out.println("r " + r + " nR " + nR + " dS " + dS + " dR " + dR + " var " + var); int dim = dS + dR - 1; int wlenr = dS - 1; double[] w = new double[dim]; DataTupleS si = new DataTupleS(wlenr); DataTupleR ri = new DataTupleR(dR); double normsq = 0.0; Random myrand = new Random(); myrand.setSeed(0xDEADBEEF); int onlyflag = 0; if (args.length > 8) { onlyflag = Integer.parseInt(args[8]); } // generate the model for (int i = dim - 1; i >= 0; i--) { w[i] = myrand.nextDouble() - 0.5; normsq += w[i] * w[i]; } double norm = Math.sqrt(normsq); for (int i = dim - 1; i >= 0; i--) { w[i] /= norm; } int format = Integer.parseInt(args[5]); //0 for binary | 1 for text Path pathS = new Path(args[6]); Path pathRdir = new Path("text" + pathS.toString()); Path pathR = new Path(args[7]); Path pathSdir = new Path("text" + pathR.toString()); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); if (fs.exists(pathS)) { fs.delete(pathS, true); } if (fs.exists(pathR)) { fs.delete(pathR, true); } if (fs.exists(pathRdir)) { fs.delete(pathRdir, true); } if (fs.exists(pathSdir)) { fs.delete(pathSdir, true); } IntWritable sidkey = new IntWritable(); IntWritable ridkey = new IntWritable(); SequenceFile.Writer writerS = null; SequenceFile.Writer writerR = null; if (format == 0) { if (onlyflag != 1) { writerS = SequenceFile.createWriter(fs, conf, pathS, sidkey.getClass(), si.getClass()); } if (onlyflag != 2) { writerR = SequenceFile.createWriter(fs, conf, pathR, ridkey.getClass(), ri.getClass()); } } FSDataOutputStream textS = null; FSDataOutputStream textR = null; if (format == 1) { if (onlyflag != 1) { //need to create it in its own dir fs.mkdirs(pathRdir); textS = fs.create(new Path(pathRdir.toString() + "/R")); } if (onlyflag != 2) { fs.mkdirs(pathSdir); textR = fs.create(new Path(pathSdir.toString() + "/S")); } } // Sample for S and R for (int k = 1; k <= nR; k++) { // generate afor random vector for XR for (int i = 0; i < dR; i++) { ri.xr[i] = myrand.nextGaussian(); } si.forkey = k; for (int rr = 1; rr <= r; rr++) { // assign this point to a cluster si.label = (myrand.nextInt(2) % 2 == 0) ? -1 : 1; // generate a random vector for XS for (int i = 0; i < wlenr; i++) { si.xs[i] = myrand.nextGaussian(); } // transform by w for (int i = 0; i < wlenr; i++) { si.xs[i] *= w[i]; } sidkey.set(r * (k - 1) + rr); //si.sid = r * (k - 1) + rr; if (onlyflag != 1) { //output to S (SID Target ForeignKey XS) if (format == 0) { writerS.append(sidkey, si); } else { textS.writeBytes(si.writeString(sidkey.get()) + "\n"); } //System.out.println("sid " + sidkey + " " + si.toString()); } } for (int i = 0; i < dR; i++) { ri.xr[i] *= w[wlenr + i]; } ridkey.set(k); //ri.rid = k; if (onlyflag != 2) { //output to R (RID XR) if (format == 0) { writerR.append(ridkey, ri); } else { textR.writeBytes(ri.writeString(ridkey.get()) + "\n"); } //System.out.println("rid " + ridkey + " " + ri.toString()); } // status if (k % 1000000 == 0) { System.out.println("Fin k = " + k); } } if (onlyflag != 1) { if (format == 0) { writerS.close(); } else { textS.close(); } } if (onlyflag != 2) { if (format == 0) { writerR.close(); } else { textR.close(); } } }