List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:net.dataninja.ee.textEngine.MoreLikeThisQuery.java
License:Apache License
/** * Find words for a more-like-this query former. * * @param docNum the id of the lucene document from which to find terms */// w w w .j a v a 2s . c om private PriorityQueue retrieveTerms(IndexReader indexReader, int docNum, Analyzer analyzer) throws IOException { // Gather term frequencies for all fields. Map termFreqMap = new HashMap(); Document d = indexReader.document(docNum); for (int i = 0; i < fieldNames.length; i++) { String fieldName = fieldNames[i]; String[] text = d.getValues(fieldName); if (text == null) continue; for (int j = 0; j < text.length; j++) { TokenStream tokens = analyzer.tokenStream(fieldName, new StringReader(text[j])); addTermFrequencies(tokens, fieldName, termFreqMap); } // for j } // for i // Combine like terms from each field and calculate a score for each. Map termScoreMap = condenseTerms(indexReader, termFreqMap); // Finally, make a queue by score. return createQueue(indexReader, termScoreMap); }
From source file:net.semanticmetadata.lire.benchmarking.TestHashingIndex.java
License:Open Source License
private void testHashing(Class featureClass, String fieldName) throws IOException, InstantiationException, IllegalAccessException { String hashesFile = "hashes.obj"; String hashesFileL = "l_hashes.obj"; int numResults = 50; int maxQueries = 20; int queryOffset = 100; File file = new File(hashesFile); if (file.exists()) file.delete();//w w w .ja va 2 s .co m file = new File(hashesFileL); if (file.exists()) file.delete(); BitSampling.generateHashFunctions(hashesFile); LocalitySensitiveHashing.generateHashFunctions(hashesFileL); // HashingIndexor hi = new HashingIndexor(); ProximityHashingIndexor hi = new ProximityHashingIndexor(); BitSampling.readHashFunctions(new FileInputStream(hashesFile)); LocalitySensitiveHashing.readHashFunctions(new FileInputStream(hashesFileL)); hi.setFeatureClass(featureClass); hi.addInputFile(new File(dataSetDataOut)); hi.setIndexPath(testIndex); hi.run(); System.out.println(); IndexReader reader = DirectoryReader .open(new RAMDirectory(FSDirectory.open(new File(testIndex)), IOContext.READONCE)); // generating ground truth for all queries ... ImageSearcher groundTruth = new GenericFastImageSearcher(numResults, featureClass, fieldName); ArrayList<ImageSearchHits> trueHitsList = new ArrayList<ImageSearchHits>(maxQueries); long time = System.currentTimeMillis(); for (int q = 0; q < maxQueries; q++) { trueHitsList.add(q, groundTruth.search(reader.document(q + queryOffset), reader)); } time = System.currentTimeMillis() - time; // header System.out.println(featureClass.getName().substring(featureClass.getName().lastIndexOf('.') + 1)); System.out.println("Number of queries: " + maxQueries); System.out.println("Time taken for linear search: " + (time / maxQueries)); System.out.printf("numFunctionBundles: %d, numBits: %d, w: %2.2f, dimensions: %d\n", BitSampling.getNumFunctionBundles(), BitSampling.getBits(), BitSampling.getW(), BitSampling.dimensions); System.out.println("#hashedResults\ttrue pos.\t#results\tms per search\tprecision"); for (int j = 100; j <= 3000; j += 100) { ImageSearcher hashed = new BitSamplingImageSearcher(numResults, fieldName, fieldName + "_hash", (LireFeature) featureClass.newInstance(), new FileInputStream(hashesFile), j); long ms = 0; long msSum = 0; int posSum = 0; for (int q = 0; q < maxQueries; q++) { ms = System.currentTimeMillis(); ImageSearchHits hashedHits = hashed.search(reader.document(q + queryOffset), reader); assert (hashedHits.length() <= numResults); msSum += System.currentTimeMillis() - ms; HashSet<Integer> t = new HashSet<Integer>(hashedHits.length()); HashSet<Integer> h = new HashSet<Integer>(hashedHits.length()); for (int i = 0; i < trueHitsList.get(q).length(); i++) { t.add(((SimpleImageSearchHits) trueHitsList.get(q)).readerID(i)); h.add(((SimpleImageSearchHits) hashedHits).readerID(i)); } assert (t.size() == h.size()); int intersect = 0; for (Iterator<Integer> iterator = h.iterator(); iterator.hasNext();) { if (t.contains(iterator.next())) { intersect++; } } posSum += intersect; } if (j > 1400) j += 100; double truePositives = ((double) posSum) / ((double) maxQueries); System.out.printf("%4d\t%4.1f\t%4d\t%6.1f\t%1.3f\n", j, truePositives, numResults, ((double) msSum) / ((double) maxQueries), truePositives / (double) numResults); if (posSum / maxQueries == numResults) break; } }
From source file:net.semanticmetadata.lire.benchmarking.TestNister.java
License:Open Source License
public void computePrecision(String pathName, Similarity similarity, String label) throws IOException { // ImageSearcher vis = new GenericImageSearcher(4, SimpleFeature.class, "featureSURFHistogram"); // ImageSearcher vis = new GenericFastImageSearcher(4, CEDD.class, DocumentBuilder.FIELD_NAME_CEDD); // VisualWordsImageSearcher vis = new VisualWordsImageSearcher(4, similarity, DocumentBuilder.FIELD_NAME_SIFT_VISUAL_WORDS); VisualWordsImageSearcher vis = new VisualWordsImageSearcher(4, similarity, DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(pathName))); int queryID, resultID; int countSearches = 0, countTruePositives = 0; float avgPrecision = 0f; Set<Integer> test = StatsUtils.drawSample(100, 10200); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i : test) { // for (int j = 0; j < tests.length; j++) { // int i = tests[j]; // for (int i =0; i < 1000; i++) { // for (int i =0; i < reader.numDocs(); i++) { if (!((reader.hasDeletions() && !liveDocs.get(i)))) { ImageSearchHits hits = vis.search(reader.document(i), reader); String s = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; s = s.replaceAll("\\D", ""); queryID = Integer.parseInt(s); countTruePositives = 0;//from w w w. ja v a 2s. com for (int k = 0; k < hits.length(); k++) { String name = hits.doc(k).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; name = name.replaceAll("\\D", ""); resultID = Integer.parseInt(name); if (queryID / 4 == resultID / 4) { //System.out.print("X"); countTruePositives++; } //else System.out.print("O"); } countSearches++; avgPrecision += (float) countTruePositives / 4f; // progress: // if (countSearches%100==0) System.out.print('.'); // if (countSearches%1000==0) System.out.print(':'); //System.out.println(); } } avgPrecision = avgPrecision / (float) countSearches; FileWriter fw = new FileWriter(new File("precision_results.txt"), true); System.out.println(label + " p@4= " + avgPrecision); fw.write(label + " p@4= " + avgPrecision + "\n"); fw.close(); }
From source file:net.semanticmetadata.lire.benchmarking.TestNister.java
License:Open Source License
public void testDocLengthIDF(String pathName) throws IOException { df = new double[1024]; int[] len = new int[10200]; avgDocLength = 0;//from w w w.jav a2 s.c o m double numDocs = 0; for (int i = 0; i < df.length; i++) df[i] = 0; for (int i = 0; i < len.length; i++) len[i] = 0; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(pathName))); for (int i = 0; i < reader.numDocs(); i++) { // if (!reader.isDeleted(i)) { String s = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; String f = reader.document(i).getValues("featureSURFHistogram")[0]; SimpleFeature sf = new SimpleFeature(); sf.setStringRepresentation(f); double[] h = sf.getDoubleHistogram(); for (int j = 0; j < h.length; j++) { if (h[j] > 0.0) df[j] += 1; // add to the document frequency avgDocLength += h[j]; len[i] += h[j]; } numDocs += 1; // } } // System.out.println("avgDocLength = " + avgDocLength/numDocs); // for (int i = 0; i < df.length; i++) // System.out.print(df[i] + ","); // System.out.println(); // for (int i = 0; i < len.length; i++) // System.out.print(len[i] + ", "); // System.out.println(); }
From source file:net.semanticmetadata.lire.benchmarking.TestSimple.java
License:Open Source License
private void doSearch(ImageSearcher searcher, String prefix, IndexReader reader) throws IOException { // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); String fileName, fullFileName; Document queryDoc;//from ww w . j a v a 2 s . co m ImageSearchHits hits; for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. fullFileName = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; fileName = getIDfromFileName(fullFileName); if (allQueries.contains(fileName)) { // ok, we've got a query here for a document ... queryDoc = reader.document(i); hits = searcher.search(queryDoc, reader); FileUtils.browseUri(FileUtils.saveImageResultsToHtml(prefix + "-" + fileName, hits, fullFileName)); } } for (int i = 0; i < outsideQueries.size(); i++) { fullFileName = outsideQueries.get(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; fileName = getIDfromFileName(fullFileName); if (allQueries.contains(fileName)) { // ok, we've got a query here for a document ... queryDoc = outsideQueries.get(i); hits = searcher.search(queryDoc, reader); FileUtils.browseUri(FileUtils.saveImageResultsToHtml(prefix + "-" + fileName, hits, fullFileName)); } } }
From source file:net.semanticmetadata.lire.benchmarking.TestUCID.java
License:Open Source License
private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader) throws IOException { double queryCount = 0d; double errorRate = 0; double map = 0; double p10 = 0; int errorCount = 0; // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); PrintWriter fw;/*from w w w . j av a2 s .com*/ if (searcher.toString().contains("ImageSearcherUsingWSs")) { (new File("eval/" + prefix.replace(' ', '_') + "/" + clusters + "/")).mkdirs(); fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "/" + clusters + "/" + prefix.replace(' ', '_') + "-" + db + clusters + searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1] + ".txt")); } else fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "-" + db + clusters + ".txt")); // fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. String fileName = getIDfromFileName( reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); if (queries.keySet().contains(fileName)) { String tmpEval = ""; queryCount += 1d; // ok, we've got a query here for a document ... Document queryDoc = reader.document(i); ImageSearchHits hits = searcher.search(queryDoc, reader); double rank = 0; double avgPrecision = 0; double found = 0; double tmpP10 = 0; Locale.setDefault(Locale.US); for (int y = 0; y < hits.length(); y++) { String hitFile = getIDfromFileName( hits.doc(y).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); // TODO: Sort by query ID! tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName), hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, hits.score(y)); // if (!hitFile.equals(fileName)) { rank++; // if ((queries.get(fileName).contains(hitFile) || hitFile.equals(fileName))&&(!fileName.equals(hitFile))) { // it's a hit. if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit. found++; // TODO: Compute error rate, etc. here. avgPrecision += found / rank;// * (1d/queries.get(fileName).size()); // avgPrecision += found / (rank-1);// * (1d/queries.get(fileName).size()); // if (rank<=60) System.out.print('X'); if (rank <= 10) tmpP10++; } else { // nothing has been found. if (rank == 1) errorRate += 1d; // if (rank<=60) System.out.print('-'); } } // } // System.out.println(); avgPrecision /= (double) (1d + queries.get(fileName).size()); // avgPrecision /= (double) (queries.get(fileName).size()); if (!(found - queries.get(fileName).size() == 1)) { // some of the results have not been found. We have to deal with it ... errorCount++; } // assertTrue(found - queries.get(fileName).size() == 0); map += avgPrecision; p10 += tmpP10; evalText.put(query2id.get(fileName), tmpEval); } } for (int i = 0; i < query2id.size(); i++) { fw.write(evalText.get(i + 1)); } fw.close(); errorRate = errorRate / queryCount; map = map / queryCount; p10 = p10 / (queryCount * 10d); // System.out.print(prefix); String s; if (searcher.toString().contains("ImageSearcherUsingWSs")) s = String.format("%s\t%.4f\t%.4f\t%.4f\t(%s)", prefix, map, p10, errorRate, searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1]); else s = String.format("%s\t%.4f\t%.4f\t%.4f", prefix, map, p10, errorRate); if (errorCount > 0) { // some of the results have not been found. We have to deal with it ... //System.err.println("Did not find result ;( (" + errorCount + ")"); s += "\t~~\tDid not find result ;(\t(" + errorCount + ")"; } System.out.println(s); }
From source file:net.semanticmetadata.lire.benchmarking.TestUCID.java
License:Open Source License
private void testSearchSpeed(ArrayList<String> images, final Class featureClass) throws IOException { parallelIndexer = new ParallelIndexer(8, indexPath, testExtensive, true) { @Override//from ww w. j av a 2s. c o m public void addBuilders(ChainedDocumentBuilder builder) { builder.addBuilder(new GenericDocumentBuilder(featureClass, "feature")); } }; parallelIndexer.run(); IndexReader reader = DirectoryReader .open(new RAMDirectory(FSDirectory.open(new File(indexPath)), IOContext.READONCE)); Bits liveDocs = MultiFields.getLiveDocs(reader); double queryCount = 0d; ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass, "feature"); long ms = System.currentTimeMillis(); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. String fileName = getIDfromFileName( reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); if (queries.keySet().contains(fileName)) { queryCount += 1d; // ok, we've got a query here for a document ... Document queryDoc = reader.document(i); ImageSearchHits hits = searcher.search(queryDoc, reader); } } ms = System.currentTimeMillis() - ms; System.out.printf("%s \t %3.1f \n", featureClass.getName().substring(featureClass.getName().lastIndexOf('.') + 1), (double) ms / queryCount); }
From source file:net.semanticmetadata.lire.benchmarking.TestUniversal.java
License:Open Source License
private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader, int clusters) throws IOException { long start = System.currentTimeMillis(); long timeOfSearch = 0, ms; double queryCount = 0d; double errorRate = 0; double map = 0; double p10 = 0; int errorCount = 0; // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); PrintWriter fw;/* w ww.jav a 2 s. co m*/ if (searcher.toString().contains("ImageSearcherUsingWSs")) { (new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/")).mkdirs(); fw = new PrintWriter(new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/" + prefix.replace(' ', '_') + "-" + db + clusters + searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1] + ".txt")); } else { // (new File("eval/#WithMirFlickr/" + db + "/")).mkdirs(); (new File("eval/" + db + "/")).mkdirs(); if (clusters > 0) fw = new PrintWriter( new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + clusters + ".txt")); else // fw = new PrintWriter(new File("eval/#WithMirFlickr/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal fw = new PrintWriter( new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal } Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. String fileName = getIDfromFileName( reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); if (queries.keySet().contains(fileName)) { String tmpEval = ""; queryCount += 1d; // ok, we've got a query here for a document ... Document queryDoc = reader.document(i); ms = System.currentTimeMillis(); ImageSearchHits hits = searcher.search(queryDoc, reader); timeOfSearch += System.currentTimeMillis() - ms; double rank = 0; double avgPrecision = 0; double found = 0; double tmpP10 = 0; Locale.setDefault(Locale.US); for (int y = 0; y < hits.length(); y++) { // String hitFile = getIDfromFileName(hits.doc(y).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); String hitFile = getIDfromFileName(reader.document(hits.documentID(y)) .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); // String hitFile = getIDfromFileName(hits.path(y)); // TODO: Sort by query ID! tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName), hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, hits.score(y)); // if (!hitFile.equals(fileName)) { rank++; // if ((queries.get(fileName).contains(hitFile) || hitFile.equals(fileName))&&(!fileName.equals(hitFile))) { // it's a hit. if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit. found++; // TODO: Compute error rate, etc. here. avgPrecision += found / rank;// * (1d/queries.get(fileName).size()); // avgPrecision += found / (rank-1);// * (1d/queries.get(fileName).size()); // if (rank<=60) System.out.print('X'); if (rank <= 10) tmpP10++; } else { // nothing has been found. if (rank == 1) errorRate += 1d; // if (rank<=60) System.out.print('-'); } } // } // System.out.println(); avgPrecision /= (double) (1d + queries.get(fileName).size()); // avgPrecision /= (double) (queries.get(fileName).size()); if (!(found - queries.get(fileName).size() == 1)) { // some of the results have not been found. We have to deal with it ... errorCount++; } // assertTrue(found - queries.get(fileName).size() == 0); map += avgPrecision; p10 += tmpP10; evalText.put(query2id.get(fileName), tmpEval); } } for (int i = 0; i < query2id.size(); i++) { fw.write(evalText.get(i + 1)); } fw.close(); errorRate = errorRate / queryCount; map = map / queryCount; p10 = p10 / (queryCount * 10d); double h = (System.currentTimeMillis() - start) / 3600000.0; double m = (h - Math.floor(h)) * 60.0; double s = (m - Math.floor(m)) * 60; String str = String.format("%s%02d:%02d", (((int) h > 0) ? String.format("%02d:", (int) h) : ""), (int) m, (int) s) + " ~ "; if (searcher.toString().contains("ImageSearcherUsingWSs")) str += String.format("%s%s\t%.4f\t%.4f\t%.4f\t(%s)", prefix, ((clusters > 0) ? ("\t" + clusters) : ""), map, p10, errorRate, searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1]); else str += String.format("%s%s\t%.4f\t%.4f\t%.4f", prefix, ((clusters > 0) ? ("\t" + clusters) : ""), map, p10, errorRate); if (errorCount > 0) { // some of the results have not been found. We have to deal with it ... str += "\t~~\tDid not find result ;(\t(" + errorCount + ")"; } h = timeOfSearch / 3600000.0; m = (h - Math.floor(h)) * 60.0; s = (m - Math.floor(m)) * 60; str += " ~ TimeOfsearch: " + String.format("%s%02d:%02d", (((int) h > 0) ? String.format("%02d:", (int) h) : ""), (int) m, (int) s); System.out.println(str); }
From source file:net.semanticmetadata.lire.benchmarking.TestUniversal.java
License:Open Source License
private void testSearchSpeed(Class<? extends GlobalFeature> featureClass) throws IOException { ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath, testExtensive, true);//from www . j a v a 2 s .c om parallelIndexer.addExtractor(featureClass); parallelIndexer.run(); IndexReader reader = DirectoryReader .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE)); Bits liveDocs = MultiFields.getLiveDocs(reader); double queryCount = 0d; ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass); long ms = System.currentTimeMillis(); String fileName; Document queryDoc; ImageSearchHits hits; for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. fileName = getIDfromFileName(reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); if (queries.keySet().contains(fileName)) { queryCount += 1d; // ok, we've got a query here for a document ... queryDoc = reader.document(i); hits = searcher.search(queryDoc, reader); } } ms = System.currentTimeMillis() - ms; System.out.printf("%s \t %3.1f \n", featureClass.getName().substring(featureClass.getName().lastIndexOf('.') + 1), (double) ms / queryCount); }
From source file:net.semanticmetadata.lire.benchmarking.TestWang.java
License:Open Source License
public void tttestGetDistribution() throws IOException { BufferedWriter bw = new BufferedWriter(new FileWriter("data.csv")); IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); // get the first document: // if (!IndexReader.indexExists(reader.directory())) // throw new FileNotFoundException("No index found at this specific location."); CEDD cedd1 = new CEDD(); FCTH fcth1 = new FCTH(); CEDD cedd2 = new CEDD(); FCTH fcth2 = new FCTH(); JCD jcd1 = new JCD(); JCD jcd2 = new JCD(); String[] cls;// w w w. j av a2 s . co m // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document doc = reader.document(i); cls = doc.getValues(DocumentBuilder.FIELD_NAME_CEDD); if (cls != null && cls.length > 0) cedd1.setStringRepresentation(cls[0]); cls = doc.getValues(DocumentBuilder.FIELD_NAME_FCTH); if (cls != null && cls.length > 0) fcth1.setStringRepresentation(cls[0]); for (int j = i + 1; j < docs; j++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document doc2 = reader.document(j); cls = doc2.getValues(DocumentBuilder.FIELD_NAME_CEDD); if (cls != null && cls.length > 0) cedd2.setStringRepresentation(cls[0]); cls = doc2.getValues(DocumentBuilder.FIELD_NAME_FCTH); if (cls != null && cls.length > 0) fcth2.setStringRepresentation(cls[0]); jcd1.init(cedd1, fcth1); jcd2.init(cedd2, fcth2); bw.write(cedd1.getDistance(cedd2) + ";" + fcth1.getDistance(fcth2) + ";" + jcd1.getDistance(jcd2) + "\n"); } if (i % 100 == 0) System.out.println(i + " entries processed ... "); } bw.close(); }