List of usage examples for org.apache.lucene.index IndexableField fieldType
public IndexableFieldType fieldType();
From source file:com.doculibre.constellio.services.ImportExportServicesImpl.java
License:Open Source License
@SuppressWarnings("unchecked") @Override/*from www .j a v a2 s.co m*/ public void importData(Directory directory, RecordCollection collection, ProgressInfo progressInfo) { try { ConnectorInstance connectorInstance = collection.getConnectorInstances().iterator().next(); RecordServices recordServices = ConstellioSpringUtils.getRecordServices(); String uniqueKeyMetaName = null; IndexField uniqueKeyIndexField = collection.getUniqueKeyIndexField(); for (ConnectorInstanceMeta connectorInstanceMeta : uniqueKeyIndexField.getConnectorInstanceMetas()) { if (connectorInstance.equals(connectorInstanceMeta.getConnectorInstance())) { uniqueKeyMetaName = connectorInstanceMeta.getName(); break; } } Pattern invalidDatePattern = Pattern .compile("^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]*)?$"); IndexReader indexReader = DirectoryReader.open(directory); if (progressInfo != null) { progressInfo.setTotal(indexReader.numDocs()); } for (int i = 0; i < indexReader.numDocs(); i++) { Document document = indexReader.document(i); Record record = new Record(); record.setLastModified(new Date()); record.setConnectorInstance(connectorInstance); for (IndexableField field : document.getFields()) { // for (String fieldName : (Collection<String>) // indexReader.getFieldNames(FieldOption.ALL)) { if (field != null && field.fieldType().stored() && field.binaryValue() == null) { String metaName = field.name(); String metaContent = field.stringValue(); Matcher invalidDateMatcher = invalidDatePattern.matcher(metaContent); if (invalidDateMatcher.matches()) { metaContent = metaContent + "Z"; } if (uniqueKeyMetaName.equals(metaName)) { record.setUrl(metaContent); } RecordMeta meta = new RecordMeta(); ConnectorInstanceMeta connectorInstanceMeta = connectorInstance.getOrCreateMeta(metaName); meta.setConnectorInstanceMeta(connectorInstanceMeta); meta.setRecord(record); meta.setContent(metaContent); record.addContentMeta(meta); } } try { recordServices.makePersistent(record); // if (i % 500 == 0) { // EntityManager entityManager = // ConstellioPersistenceContext.getCurrentEntityManager(); // entityManager.getTransaction().commit(); // entityManager.getTransaction().begin(); // } } catch (Exception e) { e.printStackTrace(); } if (progressInfo != null) { progressInfo.setCurrentIndex(i); } } indexReader.close(); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.doculibre.constellio.services.ImportExportServicesImpl.java
License:Open Source License
@SuppressWarnings("unchecked") @Override/*from w ww . j a v a 2 s . c o m*/ public void convertData(Directory directory, OutputStream output) { try { WritableWorkbook workbook = Workbook.createWorkbook(output); WritableSheet sheet = workbook.createSheet("fields", 0); WritableFont arial10font = new WritableFont(WritableFont.ARIAL, 10); WritableCellFormat arial10format = new WritableCellFormat(arial10font); IndexReader indexReader = DirectoryReader.open(directory); // { // int column = 0; // for (String fieldName : (Collection<String>) indexReader.getFieldNames()) { // Label label = new Label(column, 0, fieldName, arial10format); // sheet.addCell(label); // column++; // } // } int row = 1; for (int i = 0; i < indexReader.numDocs() /* && i != 502 */; i++) { Document document = indexReader.document(i); int column = 0; for (IndexableField field : document.getFields()) { if (row == 1) { Label label = new Label(column, 0, field.name(), arial10format); sheet.addCell(label); } if (field != null && field.fieldType().stored() && field.binaryValue() == null) { String indexedContent = field.stringValue(); indexedContent = convertText(indexedContent); Label label = new Label(column, row, indexedContent, arial10format); sheet.addCell(label); } column++; } row++; // if (i == 502) { // break; // } } indexReader.close(); workbook.write(); workbook.close(); } catch (IOException e) { throw new RuntimeException(e); } catch (RowsExceededException e) { throw new RuntimeException(e); } catch (WriteException e) { throw new RuntimeException(e); } }
From source file:com.github.hotware.lucene.extension.bean.test.BeanConverterTest.java
License:BEER-WARE LICENSE
public void testBeanDocumentConversionViceVersa() throws IllegalArgumentException, IllegalAccessException { BeanConverter converter = new BeanConverterImpl(new BeanInformationCacheImpl()); Field[] fields = TestBean.class.getFields(); TestBean testBean = new TestBean(); for (Field field : fields) { String fieldName = field.getName(); Class<?> type = field.getType(); if (type.equals(int.class)) { field.setInt(testBean, Integer.MAX_VALUE); } else if (type.equals(long.class)) { field.setLong(testBean, Long.MAX_VALUE); } else if (type.equals(double.class)) { field.setDouble(testBean, Double.MAX_VALUE); } else if (type.equals(float.class)) { field.setFloat(testBean, Float.MAX_VALUE); } else if (type.equals(boolean.class)) { field.setBoolean(testBean, true); } else if (type.equals(Integer.class)) { field.set(testBean, Integer.MAX_VALUE); } else if (type.equals(Long.class)) { field.set(testBean, Long.MAX_VALUE); } else if (type.equals(Double.class)) { field.set(testBean, Double.MAX_VALUE); } else if (type.equals(Float.class)) { field.set(testBean, Float.MAX_VALUE); } else if (type.equals(Boolean.class)) { field.set(testBean, true);/*from w w w .j a va 2s .c o m*/ } else if (type.equals(String.class)) { field.set(testBean, "Test"); } else if (fieldName.equals("emptySetTest")) { field.set(testBean, new HashSet<String>()); } else if (type.equals(Set.class)) { Set<String> set = new HashSet<String>(); set.add("1"); set.add("2"); set.add("3"); field.set(testBean, set); } else if (type.equals(List.class)) { List<String> list = new ArrayList<String>(); list.add("1"); list.add("2"); list.add("3"); field.set(testBean, list); } else if (type.equals(Object.class)) { field.set(testBean, new Date()); } else { fail("type is not handled in the Unit-Test, please add " + type); } Document document = converter.beanToDocument(testBean); // check if all values are stored the same way they were entered if (fieldName.equals("serializeTest")) { System.out.println("doing serialize equality test."); assertTrue(Arrays.equals(toSerializedLuceneValue(field.get(testBean)), document.getBinaryValue(fieldName).bytes)); } else if (fieldName.equals("customNameTest")) { System.out.println("doing custom name equality test."); String originalValue = (String) field.get(testBean); String documentValue = document.get("customName"); assertEquals(originalValue, documentValue); } else if (fieldName.equals("notAnnotatedTest")) { System.out.println("doing not annotated test."); assertEquals(null, document.get(fieldName)); } else if (fieldName.equals("listTest")) { System.out.println("doing listTest"); @SuppressWarnings("unchecked") List<String> originalList = (List<String>) field.get(testBean); IndexableField[] documentFields = document.getFields(fieldName); for (int i = 0; i < originalList.size(); ++i) { assertEquals(originalList.get(i), documentFields[i].stringValue()); } } else if (fieldName.equals("setTest")) { System.out.println("doing listTest"); @SuppressWarnings("unchecked") Set<String> originalSet = (Set<String>) field.get(testBean); Set<String> docSet = new HashSet<String>(); for (IndexableField documentField : document.getFields(fieldName)) { docSet.add(documentField.stringValue()); } assertEquals(originalSet, docSet); } else if (fieldName.equals("emptySetTest")) { System.out.println("doing emptySetTest"); assertEquals(null, document.get(fieldName)); } else if (fieldName.equals("multiTest")) { System.out.println("doing multiTest"); assertEquals("multiTest", document.get(fieldName)); } else if (fieldName.equals("multiMultiTest")) { System.out.println("doint multiMultiTest"); assertEquals("multiMultiTest", document.get(fieldName)); } else { // normally a check is needed, but in the test-case we // can do this without checking for a present annotation BeanField bf = field.getAnnotation(BeanField.class); System.out.println("doing " + bf.type() + " tests on \"" + fieldName + "\"."); assertEquals(field.get(testBean).toString(), document.get(fieldName)); IndexableField indexField = document.getField(fieldName); IndexableFieldType indexFieldType = indexField.fieldType(); assertEquals(bf.store(), indexFieldType.stored()); assertEquals(bf.index(), indexFieldType.indexed()); assertEquals(bf.tokenized(), indexFieldType.tokenized()); // TODO: test if fieldType is correct? } } // now that all the conversion works we can safely generate // a document with that and work backwards :) System.out.println("doing reverse conversion (document to bean) test."); Document document = converter.beanToDocument(testBean); TestBean reverseBean = converter.documentToBean(TestBean.class, document); // setting the stuff that can not be in the document and therefore not // in the reverseBean reverseBean.notAnnotatedTest = testBean.notAnnotatedTest; reverseBean.notStoredButIndexedTest = testBean.notStoredButIndexedTest; assertTrue(testBean.equals(reverseBean)); System.out.println("Result: conversion test successfull."); }
From source file:com.github.rnewson.couchdb.lucene.DatabaseIndexer.java
License:Apache License
public void search(final HttpServletRequest req, final HttpServletResponse resp) throws IOException, JSONException { final IndexState state = getState(req, resp); if (state == null) return;/*from w ww .j a v a 2 s . c o m*/ final IndexSearcher searcher = state.borrowSearcher(isStaleOk(req)); final String etag = state.getEtag(); final FastVectorHighlighter fvh = new FastVectorHighlighter(true, true); final JSONArray result = new JSONArray(); try { if (state.notModified(req)) { resp.setStatus(304); return; } for (final String queryString : getQueryStrings(req)) { final Analyzer analyzer = state.analyzer(req.getParameter("analyzer")); final Operator operator = "and".equalsIgnoreCase(req.getParameter("default_operator")) ? Operator.AND : Operator.OR; final Query q = state.parse(queryString, operator, analyzer); final JSONObject queryRow = new JSONObject(); queryRow.put("q", q.toString()); if (getBooleanParameter(req, "debug")) { queryRow.put("plan", QueryPlan.toPlan(q)); queryRow.put("analyzer", analyzer.getClass()); } queryRow.put("etag", etag); if (getBooleanParameter(req, "rewrite")) { final Query rewritten_q = q.rewrite(searcher.getIndexReader()); queryRow.put("rewritten_q", rewritten_q.toString()); final JSONObject freqs = new JSONObject(); final Set<Term> terms = new HashSet<Term>(); rewritten_q.extractTerms(terms); for (final Object term : terms) { final int freq = searcher.getIndexReader().docFreq((Term) term); freqs.put(term.toString(), freq); } queryRow.put("freqs", freqs); } else { // Perform the search. final TopDocs td; final StopWatch stopWatch = new StopWatch(); final boolean include_docs = getBooleanParameter(req, "include_docs"); final int highlights = getIntParameter(req, "highlights", 0); final int highlight_length = max(getIntParameter(req, "highlight_length", 18), 18); // min for fast term vector highlighter is 18 final boolean include_termvectors = getBooleanParameter(req, "include_termvectors"); final int limit = getIntParameter(req, "limit", ini.getInt("lucene.limit", 25)); final Sort sort = CustomQueryParser.toSort(req.getParameter("sort")); final int skip = getIntParameter(req, "skip", 0); final Set<String> fieldsToLoad; if (req.getParameter("include_fields") == null) { fieldsToLoad = null; } else { final String[] fields = Utils.splitOnCommas(req.getParameter("include_fields")); final List<String> list = Arrays.asList(fields); fieldsToLoad = new HashSet<String>(list); } if (sort == null) { td = searcher.search(q, null, skip + limit); } else { td = searcher.search(q, null, skip + limit, sort); } stopWatch.lap("search"); // Fetch matches (if any). final int max = Math.max(0, Math.min(td.totalHits - skip, limit)); final JSONArray rows = new JSONArray(); final String[] fetch_ids = new String[max]; for (int i = skip; i < skip + max; i++) { final Document doc; if (fieldsToLoad == null) { doc = searcher.doc(td.scoreDocs[i].doc); } else { doc = searcher.doc(td.scoreDocs[i].doc, fieldsToLoad); } final JSONObject row = new JSONObject(); final JSONObject fields = new JSONObject(); final JSONObject highlight_rows = new JSONObject(); // Include stored fields. for (final IndexableField f : doc.getFields()) { if (!f.fieldType().stored()) { continue; } final String name = f.name(); final Object value; if (f.numericValue() != null) { value = f.numericValue(); } else { value = f.stringValue(); } if (value != null) { if ("_id".equals(name)) { row.put("id", value); } else { if (!fields.has(name)) { fields.put(name, value); } else { final Object obj = fields.get(name); if (obj instanceof String || obj instanceof Number) { final JSONArray arr = new JSONArray(); arr.put(obj); arr.put(value); fields.put(name, arr); } else { assert obj instanceof JSONArray; ((JSONArray) obj).put(value); } } if (highlights > 0) { String[] frags = fvh.getBestFragments(fvh.getFieldQuery(q), searcher.getIndexReader(), td.scoreDocs[i].doc, name, highlight_length, highlights); highlight_rows.put(name, frags); } } } } if (!Float.isNaN(td.scoreDocs[i].score)) { row.put("score", td.scoreDocs[i].score); } // Include sort order (if any). if (td instanceof TopFieldDocs) { final FieldDoc fd = (FieldDoc) ((TopFieldDocs) td).scoreDocs[i]; row.put("sort_order", fd.fields); } // Fetch document (if requested). if (include_docs) { fetch_ids[i - skip] = doc.get("_id"); } if (fields.length() > 0) { row.put("fields", fields); } if (highlight_rows.length() > 0) { row.put("highlights", highlight_rows); } rows.put(row); } // Fetch documents (if requested). if (include_docs && fetch_ids.length > 0) { final List<CouchDocument> fetched_docs = database.getDocuments(fetch_ids); for (int j = 0; j < max; j++) { final CouchDocument doc = fetched_docs.get(j); final JSONObject row = doc == null ? new JSONObject("{\"error\":\"not_found\"}") : doc.asJson(); rows.getJSONObject(j).put("doc", row); } } stopWatch.lap("fetch"); queryRow.put("skip", skip); queryRow.put("limit", limit); queryRow.put("total_rows", td.totalHits); queryRow.put("search_duration", stopWatch.getElapsed("search")); queryRow.put("fetch_duration", stopWatch.getElapsed("fetch")); // Include sort info (if requested). if (td instanceof TopFieldDocs) { queryRow.put("sort_order", CustomQueryParser.toJSON(((TopFieldDocs) td).fields)); } queryRow.put("rows", rows); } result.put(queryRow); } } catch (final ParseException e) { ServletUtils.sendJsonError(req, resp, 400, "Bad query syntax: " + e.getMessage()); return; } finally { state.returnSearcher(searcher); } resp.setHeader("ETag", etag); resp.setHeader("Cache-Control", "must-revalidate"); ServletUtils.setResponseContentTypeAndEncoding(req, resp); final Object json = result.length() > 1 ? result : result.getJSONObject(0); final String callback = req.getParameter("callback"); final String body; if (callback != null) { body = String.format("%s(%s)", callback, json); } else { if (json instanceof JSONObject) { final JSONObject obj = (JSONObject) json; body = getBooleanParameter(req, "debug") ? obj.toString(2) : obj.toString(); } else { final JSONArray arr = (JSONArray) json; body = getBooleanParameter(req, "debug") ? arr.toString(2) : arr.toString(); } } final Writer writer = resp.getWriter(); try { writer.write(body); } finally { writer.close(); } }
From source file:dk.dbc.opensearch.fedora.search.WriteAheadLogTest.java
License:Open Source License
@Test public void testInitializeRecoversUncomittedFiles() throws Exception { File comitting = new File(folder.getRoot(), "writeaheadlog.committing"); File writeaheadlog = new File(folder.getRoot(), "writeaheadlog.log"); RandomAccessFile comittingRaf = new RandomAccessFile(comitting, "rwd"); RandomAccessFile writeaheadlogRaf = new RandomAccessFile(writeaheadlog, "rwd"); String pid1 = "obj:1"; Document doc1 = makeLuceneDocument(pid1); String pid2 = "obj:2"; Document doc2a = makeLuceneDocument(pid2, new Pair<String, String>("field", "value1")); Document doc2b = makeLuceneDocument(pid2, new Pair<String, String>("field", "value2")); String pid3 = "obj:3"; Document doc3 = makeLuceneDocument(pid3); // Given a writer with one document writer.updateDocument(WriteAheadLog.getPidTerm(pid1), doc1); writer.commit();/* w w w . j a v a 2 s . c o m*/ // And a comitting file with that document deleted and a new document added WriteAheadLog.writeDocumentData(comittingRaf, pid1, null); WriteAheadLog.writeDocumentData(comittingRaf, pid2, doc2a); // And a log file with one new document and one updated document WriteAheadLog.writeDocumentData(writeaheadlogRaf, pid2, doc2b); WriteAheadLog.writeDocumentData(writeaheadlogRaf, pid3, doc3); comittingRaf.close(); writeaheadlogRaf.close(); // Initialize the WAL to recover the lost files WriteAheadLog wal = new WriteAheadLog(writer, folder.getRoot(), 1000, true); int recovered = wal.initialize(); assertEquals(4, recovered); // Verify that IndexReader reader = DirectoryReader.open(writer, false); IndexSearcher searcher = new IndexSearcher(reader); TopDocs result = searcher.search(new TermQuery(WriteAheadLog.getPidTerm(pid1)), 100); assertEquals(0, result.scoreDocs.length); System.out.println(""); result = searcher.search(new TermQuery(WriteAheadLog.getPidTerm(pid2)), 100); assertEquals(1, result.scoreDocs.length); Document doc2 = reader.document(result.scoreDocs[0].doc); Iterator<IndexableField> it1 = doc2b.iterator(); Iterator<IndexableField> it2 = doc2.iterator(); do { IndexableField expected = it1.next(); IndexableField actual = it2.next(); assertEquals(expected.fieldType().stored(), actual.fieldType().stored()); if (!(expected instanceof LongField)) { assertEquals(expected.fieldType().indexed(), actual.fieldType().indexed()); assertEquals(expected.fieldType().omitNorms(), actual.fieldType().omitNorms()); assertEquals(expected.fieldType().indexOptions(), actual.fieldType().indexOptions()); } assertEquals(expected.name(), actual.name()); assertEquals(expected.stringValue(), actual.stringValue()); assertEquals(expected.numericValue(), actual.numericValue()); } while (it1.hasNext() && it2.hasNext()); // assertEquals( doc2b.toString(), doc2.toString() ); result = searcher.search(new TermQuery(WriteAheadLog.getPidTerm(pid3)), 100); assertEquals(1, result.scoreDocs.length); }
From source file:gentest.LuceneTest.java
/** * Testing for the Lucene add tag bug./*from w w w . j ava 2s .co m*/ * * @param dir * @throws IOException */ public static void printAll(FSDirectory dir) throws IOException { try (DirectoryReader r = DirectoryReader.open(dir)) { IndexSearcher search = new IndexSearcher(r); TopScoreDocCollector collect = TopScoreDocCollector.create(10); MatchAllDocsQuery all = new MatchAllDocsQuery(); search.search(all, collect); TopDocs td = collect.topDocs(); if (td.scoreDocs.length == 0) { System.out.println("NONE FOUND"); } else { for (ScoreDoc sd : td.scoreDocs) { Document doc = r.document(sd.doc); for (IndexableField field : doc.getFields()) { System.out.println("Stored: " + field.fieldType().stored()); if (!field.name().equals("content")) { System.out.println(field.name() + "\t" + field.stringValue()); } else { System.out.println(field.name()); } } System.out.println(); } } } }
From source file:gentest.LuceneTest.java
/** * Testing for the Lucene add tag bug.//w w w . j a v a2s.co m * * @throws IOException */ public static void t2() throws IOException { FSDirectory dir = FSDirectory.open(FileSystems.getDefault().getPath("_lucene_files_")); printAll(dir); IndexSearcher search; Query q; Document doc; String path = "C:.Users.Michael.Documents.NetBeansProjects.WorkforceResearchGuide.testdocs.Pets.dog.txt"; String altPath = "doge"; //Add the tag try (DirectoryReader reader = DirectoryReader.open(dir); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(new StandardAnalyzer()))) { search = new IndexSearcher(reader); q = new TermQuery(new Term("path", path)); TopDocs docs = search.search(q, 1); doc = reader.document(docs.scoreDocs[0].doc); doc.add(new TextField("tag", "tag test", Field.Store.YES)); doc.removeField("path"); doc.add(new StringField("path", path, Field.Store.YES)); // doc.removeField("path"); // doc.add(new TextField("path",altPath,Field.Store.YES)); writer.updateDocument(new Term("path", path), doc); } //Check for the tag try (DirectoryReader r = DirectoryReader.open(dir)) { search = new IndexSearcher(r); TopScoreDocCollector collect = TopScoreDocCollector.create(10); // TermQuery q = new TermQuery(new Term("path", "C:\\Users\\Michael\\Documents\\NetBeansProjects\\WorkforceResearchGuide\\testdocs\\Pets\\dog.txt")); // TermQuery q = new TermQuery(new Term("path", "C:\\Users\\Michael\\Documents\\NetBeansProjects\\WorkforceResearchGuide\\testdocs\\Pets\\cats.txt")); // TermQuery q = new TermQuery(new Term("title", "dog.txt")); q = new TermQuery(new Term("path", path)); MatchAllDocsQuery all = new MatchAllDocsQuery(); search.search(q, collect); TopDocs td = collect.topDocs(); if (td.scoreDocs.length == 0) { System.out.println("NONE FOUND"); } else { for (ScoreDoc sd : td.scoreDocs) { doc = r.document(sd.doc); for (IndexableField field : doc.getFields()) { System.out.println("Stored: " + field.fieldType().stored()); if (!field.name().equals("content")) { System.out.println(field.name() + "\t" + field.stringValue()); } else { System.out.println(field.name()); } } System.out.println(); } } } }
From source file:org.apache.mahout.text.LuceneIndexHelper.java
License:Apache License
public static void fieldShouldExistInIndex(IndexReader reader, String fieldName) throws IOException { IndexableField field = reader.document(0).getField(fieldName); if (field == null || !field.fieldType().stored()) { throw new IllegalArgumentException("Field '" + fieldName + "' is possibly not stored since first document in index does not contain this field."); }/*from w w w .jav a 2s .c o m*/ }
From source file:org.apache.maven.index.DefaultIndexerEngine.java
License:Apache License
private Map<String, String> toMap(Document d) { final HashMap<String, String> result = new HashMap<String, String>(); for (Object o : d.getFields()) { IndexableField f = (IndexableField) o; if (f.fieldType().stored()) { result.put(f.name(), f.stringValue()); }//w w w. j av a 2s . co m } return result; }
From source file:org.apache.maven.index.updater.IndexDataWriter.java
License:Apache License
public boolean writeDocument(final Document document) throws IOException { List<IndexableField> fields = document.getFields(); List<IndexableField> storedFields = new ArrayList<>(fields.size()); for (IndexableField field : fields) { if (DefaultIndexingContext.FLD_DESCRIPTOR.equals(field.name())) { if (descriptorWritten) { return false; } else { descriptorWritten = true; }/*ww w.ja va2 s .co m*/ } if (ArtifactInfo.ALL_GROUPS.equals(field.name())) { final String groupList = document.get(ArtifactInfo.ALL_GROUPS_LIST); if (groupList != null && groupList.trim().length() > 0) { allGroups.addAll(ArtifactInfo.str2lst(groupList)); } return false; } if (ArtifactInfo.ROOT_GROUPS.equals(field.name())) { final String groupList = document.get(ArtifactInfo.ROOT_GROUPS_LIST); if (groupList != null && groupList.trim().length() > 0) { rootGroups.addAll(ArtifactInfo.str2lst(groupList)); } return false; } if (field.fieldType().stored()) { storedFields.add(field); } } writeDocumentFields(storedFields); return true; }