List of usage examples for org.apache.hadoop.util.hash Hash JENKINS_HASH
int JENKINS_HASH
To view the source code for org.apache.hadoop.util.hash Hash JENKINS_HASH.
Click Source Link
From source file:edu.isi.karma.controller.command.publish.PublishRDFCommand.java
License:Apache License
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { //save the preferences savePreferences(workspace);/* www . j a va2s . c o m*/ Worksheet worksheet = workspace.getWorksheet(worksheetId); this.worksheetName = worksheet.getTitle(); // Prepare the file path and names final String rdfFileName = workspace.getCommandPreferencesId() + worksheetId + ".ttl"; final String rdfFileLocalPath = ServletContextParameterMap .getParameterValue(ContextParameter.RDF_PUBLISH_DIR) + rdfFileName; // Get the alignment for this worksheet Alignment alignment = AlignmentManager.Instance() .getAlignment(AlignmentManager.Instance().constructAlignmentId(workspace.getId(), worksheetId)); if (alignment == null) { logger.info("Alignment is NULL for " + worksheetId); return new UpdateContainer(new ErrorUpdate("Please align the worksheet before generating RDF!")); } // Generate the KR2RML data structures for the RDF generation final ErrorReport errorReport = new ErrorReport(); KR2RMLMappingGenerator mappingGen = null; String url = worksheet.getMetadataContainer().getWorksheetProperties().getPropertyValue(Property.modelUrl); String modelContext = worksheet.getMetadataContainer().getWorksheetProperties() .getPropertyValue(Property.modelContext); TripleStoreUtil utilObj = new TripleStoreUtil(); String modelRepoUrl = worksheet.getMetadataContainer().getWorksheetProperties() .getPropertyValue(Property.modelRepository); modelRepoUrl = modelRepoUrl == null || modelRepoUrl.isEmpty() ? TripleStoreUtil.defaultModelsRepoUrl : modelRepoUrl; Map<String, String> bloomfilterMapping = new HashMap<String, String>(); boolean result = true; try { mappingGen = new KR2RMLMappingGenerator(workspace, worksheet, alignment, worksheet.getSemanticTypes(), rdfSourcePrefix, rdfSourceNamespace, Boolean.valueOf(addInverseProperties), errorReport); } catch (KarmaException e) { logger.error("Error occured while generating RDF!", e); return new UpdateContainer(new ErrorUpdate("Error occured while generating RDF: " + e.getMessage())); } KR2RMLMapping mapping = mappingGen.getKR2RMLMapping(); if (url != null && !url.trim().isEmpty() && modelContext != null && !modelContext.trim().isEmpty()) { try { File tmp = new File("tmp"); PrintWriter pw = new PrintWriter(tmp); pw.println(utilObj.getMappingFromTripleStore(modelRepoUrl, modelContext, url)); pw.close(); Model model = WorksheetR2RMLJenaModelParser.loadSourceModelIntoJenaModel(tmp.toURI().toURL()); tmp.delete(); R2RMLMappingIdentifier identifier = new R2RMLMappingIdentifier(mapping.getId().getName(), new URL(url)); WorksheetR2RMLJenaModelParser parser = new WorksheetR2RMLJenaModelParser(model, identifier); mapping = parser.parse(); } catch (Exception e) { e.printStackTrace(); } } logger.debug(mapping.toString()); StringWriter sw = new StringWriter(); // Generate the RDF using KR2RML data structures long start = 0; try { List<KR2RMLRDFWriter> writers = new ArrayList<KR2RMLRDFWriter>(); File f = new File(rdfFileLocalPath); File parentDir = f.getParentFile(); parentDir.mkdirs(); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8")); N3KR2RMLRDFWriter writer = new N3KR2RMLRDFWriter( new URIFormatter(workspace.getOntologyManager(), errorReport), new PrintWriter(bw)); writer.setBaseURI(rdfSourceNamespace); writers.add(writer); if (generateBloomFilters) writers.add(new BloomFilterKR2RMLRDFWriter(new PrintWriter(sw), mapping.getId(), false, this.rdfSourceNamespace)); KR2RMLWorksheetRDFGenerator rdfGen = new KR2RMLWorksheetRDFGenerator(worksheet, workspace.getFactory(), workspace.getOntologyManager(), writers, false, mapping, errorReport); rdfGen.generateRDF(true); logger.info("RDF written to file: " + rdfFileLocalPath); if (saveToStore) { //take the contents of the RDF file and save them to the store logger.info("Using Jena DB:" + hostName + "/" + dbName + " user=" + userName); saveToStore(rdfFileLocalPath); } start = System.currentTimeMillis(); if (generateBloomFilters) { JSONObject obj = new JSONObject(sw.toString()); result &= updateTripleStore(obj, bloomfilterMapping, modelRepoUrl, modelContext, utilObj); Map<String, String> verification = new HashMap<String, String>(); Set<String> triplemaps = new HashSet<String>(Arrays.asList(obj.getString("ids").split(","))); verification.putAll(utilObj.getBloomFiltersForMaps(modelRepoUrl, modelContext, triplemaps)); boolean verify = true; for (Entry<String, String> entry : verification.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); KR2RMLBloomFilter bf2 = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH); KR2RMLBloomFilter bf = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH); bf2.populateFromCompressedAndBase64EncodedString(value); bf.populateFromCompressedAndBase64EncodedString(obj.getString(key)); bf2.and(bf); bf2.xor(bf); try { Field f1 = BloomFilter.class.getDeclaredField("bits"); f1.setAccessible(true); BitSet bits = (BitSet) f1.get(bf2); if (bits.cardinality() != 0) { verify = false; break; } } catch (Exception e) { } } if (!verify) { result &= updateTripleStore(obj, verification, modelRepoUrl, modelContext, utilObj); } long end = System.currentTimeMillis(); System.out .println("execution time: " + (end - start) + " node total: " + bloomfilterMapping.size()); } } catch (Exception e1) { logger.error("Error occured while generating RDF!", e1); return new UpdateContainer(new ErrorUpdate("Error occured while generating RDF: " + e1.getMessage())); } try { // Get the graph name from properties if empty graph uri // String graphName = worksheet.getMetadataContainer().getWorksheetProperties() // .getPropertyValue(Property.graphName); // if (this.graphUri == null || this.graphUri.isEmpty()) { // // Set to default // worksheet.getMetadataContainer().getWorksheetProperties().setPropertyValue( // Property.graphName, WorksheetProperties.createDefaultGraphName(worksheet.getTitle())); // this.graphUri = WorksheetProperties.createDefaultGraphName(worksheet.getTitle()); // } if (tripleStoreUrl == null || tripleStoreUrl.isEmpty()) { tripleStoreUrl = TripleStoreUtil.defaultDataRepoUrl; } logger.info("tripleStoreURl : " + tripleStoreUrl); result &= utilObj.saveToStore(rdfFileLocalPath, tripleStoreUrl, this.graphUri, this.replaceContext, this.rdfSourceNamespace); if (url != null && !url.isEmpty() && url.compareTo("") != 0 && utilObj.testURIExists(modelRepoUrl, "", url)) { StringBuilder sb = new StringBuilder(); url = url.trim(); if (!url.startsWith("<")) { sb.append("<"); } sb.append(url); if (!url.endsWith(">")) { sb.append(">"); } sb.append(" <"); sb.append(Uris.MODEL_HAS_DATA_URI); sb.append("> \"true\" .\n"); String input = sb.toString(); result &= utilObj.saveToStore(input, modelRepoUrl, modelContext, new Boolean(false), this.rdfSourceNamespace); } if (result) { logger.info("Saved rdf to store"); } else { logger.error("Falied to store rdf to karma_data store"); return new UpdateContainer(new ErrorUpdate("Error: Failed to store RDF to the triple store")); } } catch (Exception e) { logger.error(e.getMessage()); e.printStackTrace(); return new UpdateContainer(new ErrorUpdate("Error occured while generating RDF: " + e.getMessage())); } try { return new UpdateContainer(new AbstractUpdate() { public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) { JSONObject outputObject = new JSONObject(); try { outputObject.put(PublishRDFCommandJsonKeys.updateType.name(), "PublishRDFUpdate"); outputObject.put(PublishRDFCommandJsonKeys.fileUrl.name(), ServletContextParameterMap .getParameterValue(ContextParameter.RDF_PUBLISH_RELATIVE_DIR) + rdfFileName); outputObject.put(PublishRDFCommandJsonKeys.worksheetId.name(), worksheetId); outputObject.put(PublishRDFCommandJsonKeys.errorReport.name(), errorReport.toJSONString()); pw.println(outputObject.toString(4)); } catch (JSONException e) { logger.error("Error occured while generating JSON!"); } } }); } catch (Exception e) { return new UpdateContainer(new ErrorUpdate(e.getMessage())); } }
From source file:edu.isi.karma.rdf.bloom.TestJSONRDFGeneratorWithBloomFilters.java
License:Apache License
/** * Test method for// w w w.j a va 2s . com * {@link edu.isi.karma.rdf.JSONRDFGenerator#generateRDF(java.lang.String, java.lang.String, boolean, java.io.PrintWriter)} * . */ @Test public void testGenerateRDF1() { try { KR2RMLBloomFilterManager peopleBloomFilterManager = getBloomFilterManagerForSource("people.json", InputType.JSON, "people-model"); KR2RMLBloomFilterManager scheduleBloomFilterManager = getBloomFilterManagerForSource("schedule.csv", InputType.CSV, "schedule-model"); KR2RMLBloomFilter peoplePersonWithTwitterIdBF = peopleBloomFilterManager.getBloomFilter( "http://isi.edu/integration/karma/dev#PredicateObjectMap_5fcf2d39-f62b-4cdd-863e-bde21493e1bd"); Key k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes()); assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Kozareva>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes()); assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes()); assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Wu>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); assertEquals(3, peoplePersonWithTwitterIdBF.estimateNumberOfHashedValues()); KR2RMLBloomFilter schedulePersonBF = scheduleBloomFilterManager.getBloomFilter( "http://isi.edu/integration/karma/dev#TriplesMap_413a6176-d893-45aa-b1c2-6661b5c491ab"); k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); assertEquals(5, schedulePersonBF.estimateNumberOfHashedValues()); KR2RMLBloomFilter intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH); intersectionBF.or(peoplePersonWithTwitterIdBF); intersectionBF.and(schedulePersonBF); assertEquals(3, intersectionBF.estimateNumberOfHashedValues()); KR2RMLBloomFilter hasInstructorBF = scheduleBloomFilterManager.getBloomFilter( "http://isi.edu/integration/karma/dev#RefObjectMap_bb82f923-2953-4bd4-bc7b-d1196e05dbf6"); k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes()); assertTrue(hasInstructorBF.membershipTest(k)); intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH); intersectionBF.or(hasInstructorBF); intersectionBF.and(peoplePersonWithTwitterIdBF); assertEquals(3, intersectionBF.estimateNumberOfHashedValues()); } catch (Exception e) { logger.error("testGenerateRDF1 failed:", e); fail("Exception: " + e.getMessage()); } }
From source file:edu.isi.karma.rdf.TestJSONRDFGeneratorWithBloomFilters.java
License:Apache License
/** * Test method for/* ww w .j ava 2 s. com*/ * {@link edu.isi.karma.rdf.JSONRDFGenerator#generateRDF(java.lang.String, java.lang.String, boolean, java.io.PrintWriter)} * . */ @Test public void testGenerateRDF1() { try { KR2RMLBloomFilterManager peopleBloomFilterManager = getBloomFilterManagerForSource("people.json", InputType.JSON, "people-model"); KR2RMLBloomFilterManager scheduleBloomFilterManager = getBloomFilterManagerForSource("schedule.csv", InputType.CSV, "schedule-model"); KR2RMLBloomFilter peoplePersonWithTwitterIdBF = peopleBloomFilterManager.getBloomFilter( "http://isi.edu/integration/karma/dev#PredicateObjectMap_1941470a-1dfb-4716-803b-5f07a4af90fd"); Key k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes()); assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Kozareva>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes()); assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes()); assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Wu>").getBytes()); assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k)); assertEquals(3, peoplePersonWithTwitterIdBF.estimateNumberOfHashedValues()); KR2RMLBloomFilter schedulePersonBF = scheduleBloomFilterManager.getBloomFilter( "http://isi.edu/integration/karma/dev#TriplesMap_413a6176-d893-45aa-b1c2-6661b5c491ab"); k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes()); assertTrue(schedulePersonBF.membershipTest(k)); assertEquals(5, schedulePersonBF.estimateNumberOfHashedValues()); KR2RMLBloomFilter intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH); intersectionBF.or(peoplePersonWithTwitterIdBF); intersectionBF.and(schedulePersonBF); assertEquals(3, intersectionBF.estimateNumberOfHashedValues()); KR2RMLBloomFilter hasInstructorBF = scheduleBloomFilterManager.getBloomFilter( "http://isi.edu/integration/karma/dev#RefObjectMap_bb82f923-2953-4bd4-bc7b-d1196e05dbf6"); k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes()); assertTrue(hasInstructorBF.membershipTest(k)); intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH); intersectionBF.or(hasInstructorBF); intersectionBF.and(peoplePersonWithTwitterIdBF); assertEquals(3, intersectionBF.estimateNumberOfHashedValues()); } catch (Exception e) { logger.error("testGenerateRDF1 failed:", e); fail("Exception: " + e.getMessage()); } }
From source file:org.apache.accumulo.core.bloomfilter.Filter.java
License:Open Source License
@Override public void readFields(final DataInput in) throws IOException { final int ver = in.readInt(); rVersion = ver;/*w ww .j a va2s. c o m*/ if (ver > 0) { // old unversioned format this.nbHash = ver; this.hashType = Hash.JENKINS_HASH; } else if (ver == VERSION | ver == VERSION + 1) { // Support for directly serialzing the bitset this.nbHash = in.readInt(); this.hashType = in.readByte(); } else { throw new IOException("Unsupported version: " + ver); } this.vectorSize = in.readInt(); this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType); }
From source file:org.apache.giraph.utils.bloom.Filter.java
License:Open Source License
public void readFields(DataInput in) throws IOException { int ver = in.readInt(); if (ver > 0) { // old unversioned format this.nbHash = ver; this.hashType = Hash.JENKINS_HASH; } else if (ver == VERSION) { this.nbHash = in.readInt(); this.hashType = in.readByte(); } else {//from w ww. j av a2 s. com throw new IOException("Unsupported version: " + ver); } this.vectorSize = in.readInt(); this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType); }
From source file:org.apache.mahout.utils.nlp.collocations.llr.BloomTokenFilterTest.java
License:Apache License
private static Filter getFilter(String[] tokens) throws IOException { Filter filter = new BloomFilter(100, 50, Hash.JENKINS_HASH); Key k = new Key(); for (String s : tokens) { setKey(k, s);// w w w . j a v a 2 s . c o m filter.add(k); } return filter; }
From source file:org.apache.pig.builtin.BuildBloomBase.java
License:Apache License
private int convertHashType(String hashType) { if (hashType.toLowerCase().contains("jenkins")) { return Hash.JENKINS_HASH; } else if (hashType.toLowerCase().contains("murmur")) { return Hash.MURMUR_HASH; } else {//from w w w. ja v a 2s .c o m throw new RuntimeException("Unknown hash type " + hashType + ". Valid values are jenkins and murmur."); } }