Example usage for org.apache.hadoop.io IntWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable set.

Prototype

public void set(int value)

Source Link

Document

Set the value of this IntWritable.

Usage

From source file:org.shadowmask.engine.hive.udf.UDFMobileTest.java

License:Apache License

@Test
public void testUDFMobile() {
    UDFMobile udfMobile = new UDFMobile();
    Text mobile = new Text("13566668888");
    IntWritable mask = new IntWritable(0);
    Text result = udfMobile.evaluate(mobile, mask);
    assertEquals("13566668888", result.toString());
    mask.set(1);
    result = udfMobile.evaluate(mobile, mask);
    assertEquals("1356666****", result.toString());
    mask.set(2);/*  w w w. j  a va2s .com*/
    result = udfMobile.evaluate(mobile, mask);
    assertEquals("135********", result.toString());
    mask.set(3);
    result = udfMobile.evaluate(mobile, mask);
    assertEquals("***********", result.toString());

    mobile = null;
    result = udfMobile.evaluate(mobile, mask);
    assertNull(result);
}

From source file:org.shadowmask.engine.hive.udf.UDFTimestampTest.java

License:Apache License

@Test
public void testUDFTimestamp() {
    UDFTimestamp udfTimestamp = new UDFTimestamp();
    long time = DateTime.parse("2016-09-18T10:30:32.222").getMillis();
    TimestampWritable timestamp = new TimestampWritable();
    timestamp.setTime(time);/*from   w  w  w. j  a v a  2 s.  c  om*/
    IntWritable mask = new IntWritable(0);
    TimestampWritable result = udfTimestamp.evaluate(timestamp, mask);
    assertEquals(time, result.getTimestamp().getTime());
    mask.set(1);
    result = udfTimestamp.evaluate(timestamp, mask);
    long expect1 = DateTime.parse("2016-09-18T10:30:32.000").getMillis();
    assertEquals(expect1, result.getTimestamp().getTime());
    mask.set(2);
    result = udfTimestamp.evaluate(timestamp, mask);
    long expect2 = DateTime.parse("2016-09-18T10:30:00.000").getMillis();
    assertEquals(expect2, result.getTimestamp().getTime());
    mask.set(3);
    result = udfTimestamp.evaluate(timestamp, mask);
    long expect3 = DateTime.parse("2016-09-18T10:00:00.000").getMillis();
    assertEquals(expect3, result.getTimestamp().getTime());
    mask.set(4);
    result = udfTimestamp.evaluate(timestamp, mask);
    long expect4 = DateTime.parse("2016-09-18T00:00:00.000").getMillis();
    assertEquals(expect4, result.getTimestamp().getTime());
    mask.set(5);
    result = udfTimestamp.evaluate(timestamp, mask);
    long expect5 = DateTime.parse("2016-09-01T00:00:00.000").getMillis();
    assertEquals(expect5, result.getTimestamp().getTime());
    mask.set(6);
    result = udfTimestamp.evaluate(timestamp, mask);
    long expect6 = DateTime.parse("2016-01-01T00:00:00.000").getMillis();
    assertEquals(expect6, result.getTimestamp().getTime());
    mask.set(7);
    result = udfTimestamp.evaluate(timestamp, mask);
    long expect7 = DateTime.parse("1901-01-01T00:00:00.000").getMillis();
    assertEquals(expect7, result.getTimestamp().getTime());

    timestamp = null;
    result = udfTimestamp.evaluate(timestamp, mask);
    assertNull(result);
}

From source file:org.swjtu.helloworldcn.APCDriver.java

License:Apache License

/**
 * @param conf/* ww w .  j  a  va2s.  c  o  m*/
 *       the Configuration to be used
 * @param inputPath
 *       the Path to the input tuples directory
 * @param outputCalc
 *       the Path to the output directory
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
private void parallelUpdateRA(Configuration conf, Path inputPath, Path outputCalc)
        throws IOException, ClassNotFoundException, InterruptedException {

    Boolean dn = false;
    FileSystem fs = FileSystem.get(conf);

    int i = -1;
    Vector diagAplusR = null;
    while (!dn) {
        i = i + 1;
        Path outputPath = new Path(outputCalc, "parallelUpdateRA-" + i);
        if (i > 1) {
            HadoopUtil.delete(conf, new Path(outputCalc, "parallelUpdateRA-" + (i - 2)));
        }
        inputPath = APCParallelUpdateRAJob.runJob(inputPath, outputPath, numDims, lamda);

        diagAplusR = APCGetDiagAplusRJob.runJob(inputPath, numDims);
        //System.out.println("diag"+diagAplusR);
        RandomAccessSparseVector E = new RandomAccessSparseVector(numDims, 100);
        //RandomAccessSparseVector E = new RandomAccessSparseVector(diagAplusR);
        Iterator<Element> iter = diagAplusR.iterateNonZero();
        int K = 0;
        while (iter.hasNext()) {
            Element element = iter.next();
            if (element.get() > 0) {
                E.set(element.index(), 1.0);
                K++;
            } else {
                E.set(element.index(), 0.0);
            }
        }

        /*for (int c = 0; c < E.size(); c++) {
           if (E.get(c) > 0) {
              E.set(c, 1.0);
           } else {
              E.set(c, 0.0);
           }
        }*/

        //System.out.println("con is"+convits);
        int index = i % convits;
        if (i >= convits) {
            e.remove(index);
        }

        e.add(index, E);
        //System.out.println(e);

        //double K = E.zSum();
        if (i >= convits || i >= maxIterations) {
            RandomAccessSparseVector se = new RandomAccessSparseVector(numDims, 100);
            Iterator<RandomAccessSparseVector> iterator = e.iterator();
            while (iterator.hasNext()) {
                RandomAccessSparseVector v = iterator.next();
                se = (RandomAccessSparseVector) se.plus(v);
            }
            iter = se.iterateNonZero();
            while (iter.hasNext()) {
                Element element = iter.next();
                if (element.get() != convits) {
                    unconverged = true;
                    break;
                } else {
                    unconverged = false;
                }
            }
            if ((!unconverged && K > 0) || i == maxIterations) {
                dn = true;
            }

        }
    }
    //Get exemplars     
    exemplars = new ArrayList<Integer>();
    Path outputExemplasPath = new Path(new Path(outputCalc, "exemplars"), "result");
    IntWritable exValue = new IntWritable();
    SequenceFile.Writer writerExemplas = SequenceFile.createWriter(fs, conf, outputExemplasPath,
            NullWritable.get().getClass(), exValue.getClass());
    Iterator<Element> iter = diagAplusR.iterateNonZero();
    while (iter.hasNext()) {
        Element element = iter.next();
        if (element.get() > 0) {
            exemplars.add(element.index());
            exValue.set(element.index());
            writerExemplas.append(NullWritable.get(), exValue);
        }
    }
    /*for (int k = 0; k < numDims; k++) {
    if (diagAplusR.getQuick(k) > 0) {
        exemplars.add(k);
        exValue.set(k);
        writerExemplas.append(NullWritable.get(),exValue );
    }
    }*/
    writerExemplas.close();

    clusteringResult = APCGetClusteringResultJob.runJob(inputPath, outputExemplasPath, numDims);
    /*for (int j = 0; j< exemplars.size(); j++) {
       clusteringResult.set(exemplars.get(j), exemplars.get(j));          
    }*/
    //System.out.println("julei jieguo:"+clusteringResult);
}

From source file:org.terrier.structures.collections.TestFSOrderedMapFile.java

License:Mozilla Public License

@Before
public void setUp() throws Exception {
    file = tf.newFile("testFSOMapfile" + FSOrderedMapFile.USUAL_EXTENSION).toString();
    MapFileWriter w = FSOrderedMapFile.mapFileWrite(file);
    FixedSizeWriteableFactory<Text> keyFactory = new FixedSizeTextFactory(20);
    int offset = 0;
    for (String key : testKeys) {
        Text wkey = keyFactory.newInstance();
        IntWritable wvalue = new IntWritable();
        wkey.set(key);//from  w w  w  .  j  ava  2s  . c om
        wvalue.set(offset);

        w.write(wkey, wvalue);
        key2id.put(key, offset);
        offset++;
    }
    w.close();
}

From source file:org.terrier.structures.collections.TestFSOrderedMapFile.java

License:Mozilla Public License

@Test
public void testDupSuppres() throws Exception {
    String file = tf.newFile("testDupSuppres.fsomapfile").toString();
    FixedSizeTextFactory keyFactory = new FixedSizeTextFactory(20);
    FixedSizeWriteableFactory<IntWritable> valueFactory = new FixedSizeIntWritableFactory();
    MapFileWriter m = new FSOrderedMapFile.MultiFSOMapWriter(file, 2, keyFactory, valueFactory, true);
    Text k;//from  w  ww  .j ava 2  s. c  o  m
    IntWritable v;
    k = keyFactory.newInstance();
    k.set("a");
    v = valueFactory.newInstance();
    v.set(0);
    m.write(k, v);
    k = keyFactory.newInstance();
    k.set("b");
    v = valueFactory.newInstance();
    v.set(1);
    m.write(k, v);
    ((Flushable) m).flush();
    k = keyFactory.newInstance();
    k.set("a");
    v = valueFactory.newInstance();
    v.set(2);
    m.write(k, v);
    k = keyFactory.newInstance();
    k.set("c");
    v = valueFactory.newInstance();
    v.set(3);
    m.write(k, v);
    m.close();

    FSOrderedMapFile<Text, IntWritable> map = new FSOrderedMapFile<Text, IntWritable>(file, false, keyFactory,
            valueFactory);
    assertEquals(3, map.size());
    k = keyFactory.newInstance();
    k.set("a");
    assertEquals(0, map.get(k).get());

    k = keyFactory.newInstance();
    k.set("b");
    assertEquals(1, map.get(k).get());

    k = keyFactory.newInstance();
    k.set("c");
    assertEquals(3, map.get(k).get());

}

From source file:org.terrier.structures.collections.TestFSOrderedMapFile.java

License:Mozilla Public License

@Test
public void testInMemoryJDKCollection() throws Exception {
    FixedSizeTextFactory keyFactory = new FixedSizeTextFactory(20);
    SortedMap<Text, IntWritable> map = new TreeMap<Text, IntWritable>();
    int offset = 0;
    for (String key : testKeys) {
        Text wkey = keyFactory.newInstance();
        IntWritable wvalue = new IntWritable();
        wkey.set(key);/* www .  jav a2 s  . c o m*/
        wvalue.set(offset);

        map.put(wkey, wvalue);
        offset++;
    }
    checkKeys(keyFactory, map);
}

From source file:svm_scale.Svm_scale.java

License:Apache License

/**
 * Writes out the files for S and R in space-separated-values format
 * S is the entity table (the outer table for the join)
 * Schema of S is (SID Target ForeignKey XS)
 * R is the attribute table (the inner table for the join)
 * Schema of R is (RID XR)// w ww.  j  av a  2s .  c o  m
 * nS is number of tuples in S, nR is number of tuples in R
 * dS is number of features in S (including target), dR is number of features in R
 * Variance is a parameter for the Gaussian random number generator (1.0 is recommended)
 * Sfile is the prefix of the name for S files
 * Rfile is the name of the R file
 * The last argument enables generation of only R file or only S files
 */
public static void main(String[] args) throws IOException {
    if (args.length < 8) {
        System.out.println(
                "Usage: hadoop jar svm_scale.jar svm_scale.Svm_scale <Ratio of nS:nR> <nR> <dS> <dR> <Variance> <0 for binary | 1 for txt> <Sfile> <Rfile> [1 for R alone | 2 for S alone]");
        System.exit(1);
    }

    int r = Integer.parseInt(args[0]);
    int nR = Integer.parseInt(args[1]);
    int dS = Integer.parseInt(args[2]); //includes target
    int dR = Integer.parseInt(args[3]);
    double var = Integer.parseInt(args[4]);
    System.out.println("r " + r + " nR " + nR + " dS " + dS + " dR " + dR + " var " + var);

    int dim = dS + dR - 1;
    int wlenr = dS - 1;
    double[] w = new double[dim];
    DataTupleS si = new DataTupleS(wlenr);
    DataTupleR ri = new DataTupleR(dR);
    double normsq = 0.0;
    Random myrand = new Random();
    myrand.setSeed(0xDEADBEEF);

    int onlyflag = 0;
    if (args.length > 8) {
        onlyflag = Integer.parseInt(args[8]);
    }

    // generate the model
    for (int i = dim - 1; i >= 0; i--) {
        w[i] = myrand.nextDouble() - 0.5;
        normsq += w[i] * w[i];
    }
    double norm = Math.sqrt(normsq);
    for (int i = dim - 1; i >= 0; i--) {
        w[i] /= norm;
    }

    int format = Integer.parseInt(args[5]); //0 for binary | 1 for text
    Path pathS = new Path(args[6]);
    Path pathRdir = new Path("text" + pathS.toString());
    Path pathR = new Path(args[7]);
    Path pathSdir = new Path("text" + pathR.toString());
    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(pathS)) {
        fs.delete(pathS, true);
    }
    if (fs.exists(pathR)) {
        fs.delete(pathR, true);
    }
    if (fs.exists(pathRdir)) {
        fs.delete(pathRdir, true);
    }
    if (fs.exists(pathSdir)) {
        fs.delete(pathSdir, true);
    }

    IntWritable sidkey = new IntWritable();
    IntWritable ridkey = new IntWritable();
    SequenceFile.Writer writerS = null;
    SequenceFile.Writer writerR = null;
    if (format == 0) {
        if (onlyflag != 1) {
            writerS = SequenceFile.createWriter(fs, conf, pathS, sidkey.getClass(), si.getClass());
        }
        if (onlyflag != 2) {
            writerR = SequenceFile.createWriter(fs, conf, pathR, ridkey.getClass(), ri.getClass());
        }
    }
    FSDataOutputStream textS = null;
    FSDataOutputStream textR = null;
    if (format == 1) {
        if (onlyflag != 1) {
            //need to create it in its own dir
            fs.mkdirs(pathRdir);
            textS = fs.create(new Path(pathRdir.toString() + "/R"));
        }
        if (onlyflag != 2) {
            fs.mkdirs(pathSdir);
            textR = fs.create(new Path(pathSdir.toString() + "/S"));
        }
    }

    // Sample for S and R
    for (int k = 1; k <= nR; k++) {
        // generate afor random vector for XR
        for (int i = 0; i < dR; i++) {
            ri.xr[i] = myrand.nextGaussian();
        }

        si.forkey = k;
        for (int rr = 1; rr <= r; rr++) {
            // assign this point to a cluster
            si.label = (myrand.nextInt(2) % 2 == 0) ? -1 : 1;
            // generate a random vector for XS
            for (int i = 0; i < wlenr; i++) {
                si.xs[i] = myrand.nextGaussian();
            }
            // transform by w
            for (int i = 0; i < wlenr; i++) {
                si.xs[i] *= w[i];
            }
            sidkey.set(r * (k - 1) + rr); //si.sid = r * (k - 1) + rr;

            if (onlyflag != 1) {
                //output to S (SID Target ForeignKey XS)
                if (format == 0) {
                    writerS.append(sidkey, si);
                } else {
                    textS.writeBytes(si.writeString(sidkey.get()) + "\n");
                }
                //System.out.println("sid " + sidkey + " " + si.toString());
            }
        }

        for (int i = 0; i < dR; i++) {
            ri.xr[i] *= w[wlenr + i];
        }
        ridkey.set(k); //ri.rid = k;

        if (onlyflag != 2) {
            //output to R (RID XR)
            if (format == 0) {
                writerR.append(ridkey, ri);
            } else {
                textR.writeBytes(ri.writeString(ridkey.get()) + "\n");
            }
            //System.out.println("rid " + ridkey + " " + ri.toString());
        }

        // status
        if (k % 1000000 == 0) {
            System.out.println("Fin k = " + k);
        }
    }

    if (onlyflag != 1) {
        if (format == 0) {
            writerS.close();
        } else {
            textS.close();
        }
    }
    if (onlyflag != 2) {
        if (format == 0) {
            writerR.close();
        } else {
            textR.close();
        }
    }
}