List of usage examples for org.apache.lucene.document DateTools timeToString
public static String timeToString(long time, Resolution resolution)
From source file:aos.lucene.tools.ChainedFilterTest.java
License:Apache License
@Override public void setUp() throws Exception { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(Version.LUCENE_46), IndexWriter.MaxFieldLength.UNLIMITED); Calendar cal = Calendar.getInstance(); cal.set(2009, 1, 1, 0, 0);//from w w w . j a va2 s.com for (int i = 0; i < MAX; i++) { Document doc = new Document(); doc.add(new Field("key", "" + (i + 1), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("owner", (i < MAX / 2) ? "bob" : "sue", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("date", DateTools.timeToString(cal.getTimeInMillis(), DateTools.Resolution.DAY), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); cal.add(Calendar.DATE, 1); } writer.close(); searcher = new IndexSearcher(directory); BooleanQuery bq = new BooleanQuery(); bq.add(new TermQuery(new Term("owner", "bob")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term("owner", "sue")), BooleanClause.Occur.SHOULD); query = bq; cal.set(2099, 1, 1, 0, 0); dateFilter = TermRangeFilter.Less("date", DateTools.timeToString(cal.getTimeInMillis(), DateTools.Resolution.DAY));// C bobFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("owner", "bob")))); sueFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("owner", "sue")))); }
From source file:aplicacion.sistema.indexer.test.FileDocument.java
License:Apache License
/** Makes a document for a File. <p>/*from w w w . j a v a 2 s . c o m*/ The document has three fields: <ul> <li><code>path</code>--containing the pathname of the file, as a stored, untokenized field; <li><code>modified</code>--containing the last modified date of the file as a field as created by <a href="lucene.document.DateTools.html">DateTools</a>; and <li><code>contents</code>--containing the full contents of the file, as a Reader field; */ public static Document Document(File f) throws java.io.FileNotFoundException { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a field that is // indexed (i.e. searchable), but don't tokenize the field into words. doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the last modified date of the file a field named "modified". Use // a field that is indexed (i.e. searchable), but don't tokenize the field // into words. doc.add(new Field("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in the system's default encoding. // If that's not the case searching for special characters will fail. doc.add(new Field("contents", new FileReader(f))); // return the document return doc; }
From source file:busqueda.HTMLDocument.java
License:Apache License
public static String uid(File f) { // Append path and date into a string in such a way that lexicographic // sorting gives the same results as a walk of the file hierarchy. Thus // null (\u0000) is used both to separate directory components and to // separate the path from the date. return f.getPath().replace(dirSep, '\u0000') + "\u0000" + DateTools.timeToString(f.lastModified(), DateTools.Resolution.SECOND); }
From source file:busqueda.HTMLDocument.java
License:Apache License
public static Document Document(File f) throws IOException, InterruptedException { // make a new, empty document Document doc = new Document(); // Add the url as a field named "path". Use a field that is // indexed (i.e. searchable), but don't tokenize the field into words. doc.add(new Field("path", f.getPath().replace(dirSep, '/'), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the last modified date of the file a field named "modified". // Use a field that is indexed (i.e. searchable), but don't tokenize // the field into words. doc.add(new Field("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the uid as a field, so that index can be incrementally maintained. // This field is not stored with document, it is indexed, but it is not // tokenized prior to indexing. doc.add(new Field("uid", uid(f), Field.Store.NO, Field.Index.NOT_ANALYZED)); FileInputStream fis = new FileInputStream(f); HTMLParser parser = new HTMLParser(fis); // Add the tag-stripped contents as a Reader-valued Text field so it will // get tokenized and indexed. doc.add(new Field("contents", parser.getReader())); // Add the summary as a field that is stored and returned with // hit documents for display. doc.add(new Field("summary", parser.getSummary(), Field.Store.YES, Field.Index.NO)); // Add the title as a field that it can be searched and that is stored. doc.add(new Field("title", parser.getTitle(), Field.Store.YES, Field.Index.ANALYZED)); // return the document return doc;//from ww w .j a va2 s . c om }
From source file:com.edu.lucene.FileDocument.java
License:Apache License
/** Makes a document for a File. <p>/* w ww . jav a 2s . c o m*/ The document has three fields: <ul> <li><code>path</code>--containing the pathname of the file, as a stored, untokenized field; <li><code>modified</code>--containing the last modified date of the file as a field as created by <a href="lucene.document.DateTools.html">DateTools</a>; and <li><code>contents</code>--containing the full contents of the file, as a Reader field; */ public static Document Document(File f) throws java.io.FileNotFoundException { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a field that is // indexed (i.e. searchable), but don't tokenize the field into words. doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the last modified date of the file a field named "modified". Use // a field that is indexed (i.e. searchable), but don't tokenize the field // into words. doc.add(new Field("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in the system's default encoding. // If that's not the case searching for special characters will fail. doc.add(new Field("contents", new FileReader(f))); // return the document return doc; }
From source file:com.emental.mindraider.core.search.FileDocument.java
License:Apache License
/** * Makes a document for a File./*from www . j a va 2s . com*/ * <p> * The document has three fields: * <ul> * <li><code>path</code> containing the pathname of the file, as a * stored, tokenized field; * <li><code>modified</code> containing the last modified date of the * file as a keyword field as encoded by <a * href="lucene.document.DateField.html">DateField</a>; and * <li><code>contents</code> containing the full contents of the file, as * a Reader field; * </ul> */ public static Document Document(File f, String notebookLabel, String conceptLabel, String conceptUri) throws java.io.FileNotFoundException { // make a new, empty lucene document Document doc = new Document(); // no assemble the document from fields - some of them will be searchable, // others will be available in the result (as document attributes) i.e. stored in the index Field field; // concept URI as attribute - used to delete the document field = new Field("uri", conceptUri, Field.Store.YES, Field.Index.UN_TOKENIZED); doc.add(field); // path as attribute field = new Field("path", f.getPath(), Field.Store.YES, Field.Index.NO); doc.add(field); // SEARCHABLE concept label field = new Field("conceptLabel", conceptLabel, Field.Store.YES, Field.Index.TOKENIZED); doc.add(field); // notebook label attribute field = new Field("outlineLabel", notebookLabel, Field.Store.YES, Field.Index.NO); doc.add(field); // timestamp as attribute field = new Field("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.NO); doc.add(field); // concept annotation - the most important FileInputStream is = new FileInputStream(f); Reader reader = new BufferedReader(new InputStreamReader(is)); field = new Field("contents", reader); doc.add(field); // return the document return doc; }
From source file:com.esri.gpt.catalog.lucene.QueryProvider.java
License:Apache License
/** * Checks if provided date is a full date stored in the index. Full date is * a date of milliseconds resolution.//w w w .j a va 2s . c om * @param queryText possibly a full date * @return <code>true</code> if this is a full date. */ private boolean isFullDate(String queryText) { try { queryText = Val.chkStr(queryText); long lngDate = DateTools.stringToTime(queryText); return queryText.matches("[0-9]+") && queryText.length() >= DateTools .timeToString(lngDate, DateTools.Resolution.MILLISECOND).length(); } catch (java.text.ParseException ex) { return false; } }
From source file:com.esri.gpt.catalog.lucene.TimestampField.java
License:Apache License
/** * Converts a time stamp to a String that can be indexed for search. * @param value the times stamp to convert * @return the indexable string//from w w w .j ava2 s . c o m */ protected static String timestampToIndexableString(Timestamp value) { if (value == null) { return null; } else { return DateTools.timeToString(value.getTime(), DateTools.Resolution.MILLISECOND); } }
From source file:com.esri.gpt.server.assertion.index.AsnSystemPart.java
License:Apache License
/** * Appends fields to a document for indexing. * @param document the document//w ww. j av a2 s . co m */ public void appendWritableFields(Document document) { Field fld; String val; long millis; // sys.assertionid val = Val.chkStr(this.getAssertionId()); if (val.length() == 0) { this.setAssertionId(UUID.randomUUID().toString()); val = Val.chkStr(this.getAssertionId()); } fld = new Field(AsnConstants.FIELD_SYS_ASSERTIONID, val, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO); document.add(fld); // sys.edit.timestamp if (this.getEditTimestamp() != null) { millis = this.getEditTimestamp().getTime(); val = DateTools.timeToString(millis, DateTools.Resolution.MILLISECOND); fld = new Field(AsnConstants.FIELD_SYS_EDIT_TIMESTAMP, val, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO); document.add(fld); } // sys.enabled if (!this.getEnabled()) { fld = new Field(AsnConstants.FIELD_SYS_ENABLED, "false", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO); document.add(fld); } // sys.resourceid val = Val.chkStr(this.getResourceId()); if (val.length() > 0) { fld = new Field(AsnConstants.FIELD_SYS_RESOURCEID, val, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO); document.add(fld); } // sys.timestamp if (this.getTimestamp() == null) { this.setTimestamp(new Timestamp(System.currentTimeMillis())); } millis = this.getTimestamp().getTime(); val = DateTools.timeToString(millis, DateTools.Resolution.MILLISECOND); fld = new Field(AsnConstants.FIELD_SYS_TIMESTAMP, val, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO); document.add(fld); }
From source file:com.gitblit.LuceneExecutor.java
License:Apache License
/** * This completely indexes the repository and will destroy any existing * index./* www . ja v a 2s .c om*/ * * @param repositoryName * @param repository * @return IndexResult */ public IndexResult reindex(RepositoryModel model, Repository repository) { IndexResult result = new IndexResult(); if (!deleteIndex(model.name)) { return result; } try { String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); FileBasedConfig config = getConfig(repository); Set<String> indexedCommits = new TreeSet<String>(); IndexWriter writer = getIndexWriter(model.name); // build a quick lookup of tags Map<String, List<String>> tags = new HashMap<String, List<String>>(); for (RefModel tag : JGitUtils.getTags(repository, false, -1)) { if (!tag.isAnnotatedTag()) { // skip non-annotated tags continue; } if (!tags.containsKey(tag.getObjectId())) { tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>()); } tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName); } ObjectReader reader = repository.newObjectReader(); // get the local branches List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1); // sort them by most recently updated Collections.sort(branches, new Comparator<RefModel>() { @Override public int compare(RefModel ref1, RefModel ref2) { return ref2.getDate().compareTo(ref1.getDate()); } }); // reorder default branch to first position RefModel defaultBranch = null; ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository); for (RefModel branch : branches) { if (branch.getObjectId().equals(defaultBranchId)) { defaultBranch = branch; break; } } branches.remove(defaultBranch); branches.add(0, defaultBranch); // walk through each branch for (RefModel branch : branches) { boolean indexBranch = false; if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH) && branch.equals(defaultBranch)) { // indexing "default" branch indexBranch = true; } else if (IssueUtils.GB_ISSUES.equals(branch)) { // skip the GB_ISSUES branch because it is indexed later // note: this is different than updateIndex indexBranch = false; } else { // normal explicit branch check indexBranch = model.indexedBranches.contains(branch.getName()); } // if this branch is not specifically indexed then skip if (!indexBranch) { continue; } String branchName = branch.getName(); RevWalk revWalk = new RevWalk(reader); RevCommit tip = revWalk.parseCommit(branch.getObjectId()); String tipId = tip.getId().getName(); String keyName = getBranchKey(branchName); config.setString(CONF_ALIAS, null, keyName, branchName); config.setString(CONF_BRANCH, null, keyName, tipId); // index the blob contents of the tree TreeWalk treeWalk = new TreeWalk(repository); treeWalk.addTree(tip.getTree()); treeWalk.setRecursive(true); Map<String, ObjectId> paths = new TreeMap<String, ObjectId>(); while (treeWalk.next()) { // ensure path is not in a submodule if (treeWalk.getFileMode(0) != FileMode.GITLINK) { paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0)); } } ByteArrayOutputStream os = new ByteArrayOutputStream(); byte[] tmp = new byte[32767]; RevWalk commitWalk = new RevWalk(reader); commitWalk.markStart(tip); RevCommit commit; while ((paths.size() > 0) && (commit = commitWalk.next()) != null) { TreeWalk diffWalk = new TreeWalk(reader); int parentCount = commit.getParentCount(); switch (parentCount) { case 0: diffWalk.addTree(new EmptyTreeIterator()); break; case 1: diffWalk.addTree(getTree(commitWalk, commit.getParent(0))); break; default: // skip merge commits continue; } diffWalk.addTree(getTree(commitWalk, commit)); diffWalk.setFilter(ANY_DIFF); diffWalk.setRecursive(true); while ((paths.size() > 0) && diffWalk.next()) { String path = diffWalk.getPathString(); if (!paths.containsKey(path)) { continue; } // remove path from set ObjectId blobId = paths.remove(path); result.blobCount++; // index the blob metadata String blobAuthor = getAuthor(commit); String blobCommitter = getCommitter(commit); String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE); Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO)); doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED)); // determine extension to compare to the extension // blacklist String ext = null; String name = path.toLowerCase(); if (name.indexOf('.') > -1) { ext = name.substring(name.lastIndexOf('.') + 1); } // index the blob content if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB); InputStream in = ldr.openStream(); int n; while ((n = in.read(tmp)) > 0) { os.write(tmp, 0, n); } in.close(); byte[] content = os.toByteArray(); String str = StringUtils.decodeString(content, encodings); doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED)); os.reset(); } // add the blob to the index writer.addDocument(doc); } } os.close(); // index the tip commit object if (indexedCommits.add(tipId)) { Document doc = createDocument(tip, tags.get(tipId)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); result.commitCount += 1; result.branchCount += 1; } // traverse the log and index the previous commit objects RevWalk historyWalk = new RevWalk(reader); historyWalk.markStart(historyWalk.parseCommit(tip.getId())); RevCommit rev; while ((rev = historyWalk.next()) != null) { String hash = rev.getId().getName(); if (indexedCommits.add(hash)) { Document doc = createDocument(rev, tags.get(hash)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); result.commitCount += 1; } } } // finished reader.release(); // this repository has a gb-issues branch, index all issues if (IssueUtils.getIssuesBranch(repository) != null) { List<IssueModel> issues = IssueUtils.getIssues(repository, null); if (issues.size() > 0) { result.branchCount += 1; } for (IssueModel issue : issues) { result.issueCount++; Document doc = createDocument(issue); writer.addDocument(doc); } } // commit all changes and reset the searcher config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION); config.save(); writer.commit(); resetIndexSearcher(model.name); result.success(); } catch (Exception e) { logger.error("Exception while reindexing " + model.name, e); } return result; }