List of usage examples for org.apache.hadoop.io Text Text
public Text(byte[] utf8)
From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java
License:Apache License
@Test public void One_PG_N_Items() throws Exception { MultipleOutputs<IntArrayWritable, IntMatrixWritable> mos = createMock(MultipleOutputs.class); mos.write(newIAW(1), EmptyImw, "pg/bucket-0"); mos.write(newIAW(0), new IntMatrixWritable(newIAW(0, 1, 2, 4, 7, 9), newIAW(0, 1, 2, 3, 5, 6, 8)), "pg/bucket-0"); mos.write(newIAW(1), new IntMatrixWritable(newIAW(1, 2, 3), newIAW(4, 5, 6)), "pg/bucket-0"); mos.write(newIAW(3), new IntMatrixWritable(newIAW(4, 7, 9), newIAW(4, 7, 9)), "pg/bucket-0"); mos.write(EmptyIaw, EmptyImw, "pg/bucket-0"); mos.close();/*from w w w . j a va 2 s .c om*/ Reducer.Context ctx = createMock(Reducer.Context.class); EasyMock.expect(ctx.getConfiguration()).andReturn(createConfiguration()).anyTimes(); EasyMock.expect(ctx.getTaskAttemptID()).andReturn(new TaskAttemptID()).anyTimes(); EasyMock.replay(ctx, mos); ComputeTidListReducer reducer = new ComputeTidListReducer(); reducer.setup(ctx); setField(reducer, "mos", mos); reducer.reduce(new Text("1"), createTestInput_NItems(), ctx); reducer.cleanup(ctx); EasyMock.verify(mos); }
From source file:be.uantwerpen.adrem.bigfim.ComputeTidListReducerTest.java
License:Apache License
@Test public void N_PG_N_Items() throws Exception { MultipleOutputs<IntArrayWritable, IntMatrixWritable> mos = createMock(MultipleOutputs.class); mos.write(newIAW(1), EmptyImw, "pg/bucket-0"); mos.write(newIAW(0), new IntMatrixWritable(newIAW(0, 1, 2, 4, 7, 9), newIAW(0, 1, 2, 3, 5, 6, 8)), "pg/bucket-0"); mos.write(newIAW(1), new IntMatrixWritable(newIAW(1, 2, 3), newIAW(4, 5, 6)), "pg/bucket-0"); mos.write(newIAW(3), new IntMatrixWritable(newIAW(4, 7, 9), newIAW(4, 7, 9)), "pg/bucket-0"); mos.write(EmptyIaw, EmptyImw, "pg/bucket-0"); mos.write(newIAW(2), EmptyImw, "pg/bucket-1"); mos.write(newIAW(1), new IntMatrixWritable(newIAW(1, 4, 7, 8), newIAW(1, 5, 6, 8)), "pg/bucket-1"); mos.write(newIAW(2), new IntMatrixWritable(newIAW(3, 5, 7), newIAW(1, 2, 3, 4, 5, 6, 7, 8, 9)), "pg/bucket-1"); mos.write(EmptyIaw, EmptyImw, "pg/bucket-1"); mos.close();/*from w w w .j a v a2 s .c o m*/ Reducer.Context ctx = createMock(Reducer.Context.class); EasyMock.expect(ctx.getConfiguration()).andReturn(createConfiguration()).anyTimes(); EasyMock.expect(ctx.getTaskAttemptID()).andReturn(new TaskAttemptID()).anyTimes(); EasyMock.replay(ctx, mos); ComputeTidListReducer reducer = new ComputeTidListReducer(); reducer.setup(ctx); setField(reducer, "mos", mos); reducer.reduce(new Text("1"), createTestInput_NItems(), ctx); reducer.reduce(new Text("2"), createTestInput_NItems2(), ctx); reducer.cleanup(ctx); EasyMock.verify(mos); }
From source file:be.uantwerpen.adrem.disteclat.ItemReaderReducer.java
License:Apache License
private void reportItemsWithLargeSupport(Map<Integer, IntArrayWritable[]> map, Context context) throws IOException, InterruptedException { for (Entry<Integer, IntArrayWritable[]> entry : map.entrySet()) { int support = 0; for (IntArrayWritable iaw : entry.getValue()) { support += iaw.get().length; }//from w w w. j av a2s . c o m if (support < minSup) { itemSupports.remove(entry.getKey()); continue; } final Integer item = entry.getKey(); final IntArrayWritable[] tids = entry.getValue(); // write the item to the short fis file mos.write(new IntWritable(1), new Text(item + "(" + support + ")"), shortFisFilename); // write the item with the tidlist mos.write(OSingletonsTids, new IntWritable(item), new IntMatrixWritable(tids)); } }
From source file:be.uantwerpen.adrem.disteclat.ItemReaderReducer.java
License:Apache License
/** * Writes the singletons order to the file OSingletonsOrder. * /* w w w. ja v a 2s. c o m*/ * @param sortedSingletons * the sorted singletons * @throws IOException * @throws InterruptedException */ private void writeSingletonsOrders(List<Integer> sortedSingletons) throws IOException, InterruptedException { StringBuilder builder = new StringBuilder(); for (Integer singleton : sortedSingletons) { builder.append(singleton).append(" "); } Text order = new Text(builder.substring(0, builder.length() - 1)); mos.write(OSingletonsOrder, EmptyKey, order); }
From source file:be.uantwerpen.adrem.eclat.EclatMinerReducerSetCount.java
License:Apache License
@Override public void cleanup(Context context) throws IOException, InterruptedException { System.out.println("Total: " + total); context.write(new Text("Total"), new LongWritable(total)); }
From source file:be.uantwerpen.adrem.eclat.util.ItemsetLengthCountReporter.java
License:Apache License
@Override public void close() { try {// ww w .ja v a 2 s .com for (Entry<Integer, MutableLong> entry : counts.entrySet()) { context.write(new Text("" + entry.getKey()), new LongWritable(entry.getValue().longValue())); } } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:be.uantwerpen.adrem.eclat.util.ItemsetStringReporter.java
License:Apache License
@Override public void report(int[] itemset, int support) { for (int i : itemset) { builder.append(i + " "); }//w w w .j a va 2 s . c om builder.append("(" + support + ")\n"); count++; if (count % MAX_SETS_BUFFER == 0) { try { context.write(new Text("" + count), new Text(builder.toString())); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } System.out.println("wrote " + count + " itemsets"); builder.setLength(0); count = 0; } }
From source file:be.uantwerpen.adrem.eclat.util.ItemsetStringReporter.java
License:Apache License
@Override public void close() { try {// w w w . j a va 2 s . co m context.write(new Text("" + count), new Text(builder.toString())); System.out.println("wrote " + count + " itemsets"); builder.setLength(0); count = 0; } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:be.uantwerpen.adrem.eclat.util.PrefixItemTIDsReporter.java
License:Apache License
@Override public void report(int[] itemset, int support) { StringBuilder sb = new StringBuilder(); if (itemset.length < prefixLength) { try {/* w ww. j a v a 2s . co m*/ context.write(ShortKey, new IntMatrixWritable(IntArrayWritable.of(itemset), IntArrayWritable.of(support))); } catch (Exception e) { e.printStackTrace(); } return; } int prefixStrLength = 0; int lastItem = -1; for (int item : itemset) { prefixStrLength = sb.length() - 1; sb.append(item).append(" "); lastItem = item; } sb.setLength(prefixStrLength); Text key = new Text(sb.toString()); TidList tids = computeTids(itemset); IntArrayWritable[] iaw = new IntArrayWritable[tids.tids.length + 1]; for (int i = 1; i < iaw.length; i++) { iaw[i] = IntArrayWritable.of(tids.tids[i - 1]); } iaw[0] = IntArrayWritable.of(lastItem); try { context.write(key, new IntMatrixWritable(iaw)); } catch (Exception e) { e.printStackTrace(); } }
From source file:be.uantwerpen.adrem.eclat.util.TreeStringReporter.java
License:Apache License
@Override public void report(int[] itemset, int support) { if (prevSet == null) { for (int i = 0; i < itemset.length - 1; i++) { builder.append(itemset[i]).append(SEPARATOR); }//from w w w . j a va 2 s. c o m builder.append(itemset[itemset.length - 1]); } else { int depth = 0; while (depth < itemset.length && depth < prevSet.length && itemset[depth] == prevSet[depth]) { depth++; } for (int i = prevSet.length - depth; i > 0; i--) { builder.append(SYMBOL); } for (int i = depth; i < itemset.length - 1; i++) { builder.append(itemset[i]).append(SEPARATOR); } if (depth < itemset.length) { builder.append(itemset[itemset.length - 1]); } } builder.append(OPENSUP).append(support).append(CLOSESUP); prevSet = Arrays.copyOf(itemset, itemset.length); count++; if (count % MAX_SETS_BUFFER == 0) { try { context.write(new Text("" + count), new Text(builder.toString())); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } System.out.println("wrote " + count + " compressed itemsets"); builder.setLength(0); count = 0; } }