Example usage for org.apache.lucene.document DateTools timeToString

List of usage examples for org.apache.lucene.document DateTools timeToString

Introduction

In this page you can find the example usage for org.apache.lucene.document DateTools timeToString.

Prototype

public static String timeToString(long time, Resolution resolution) 

Source Link

Document

Converts a millisecond time to a string suitable for indexing.

Usage

From source file:aos.lucene.tools.ChainedFilterTest.java

License:Apache License

@Override
public void setUp() throws Exception {

    directory = new RAMDirectory();

    IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(Version.LUCENE_46),
            IndexWriter.MaxFieldLength.UNLIMITED);

    Calendar cal = Calendar.getInstance();
    cal.set(2009, 1, 1, 0, 0);//from   w  w w . j  a va2  s.com

    for (int i = 0; i < MAX; i++) {
        Document doc = new Document();
        doc.add(new Field("key", "" + (i + 1), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("owner", (i < MAX / 2) ? "bob" : "sue", Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("date", DateTools.timeToString(cal.getTimeInMillis(), DateTools.Resolution.DAY),
                Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);

        cal.add(Calendar.DATE, 1);
    }

    writer.close();

    searcher = new IndexSearcher(directory);

    BooleanQuery bq = new BooleanQuery();
    bq.add(new TermQuery(new Term("owner", "bob")), BooleanClause.Occur.SHOULD);
    bq.add(new TermQuery(new Term("owner", "sue")), BooleanClause.Occur.SHOULD);
    query = bq;

    cal.set(2099, 1, 1, 0, 0);
    dateFilter = TermRangeFilter.Less("date",
            DateTools.timeToString(cal.getTimeInMillis(), DateTools.Resolution.DAY));// C

    bobFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("owner", "bob"))));

    sueFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("owner", "sue"))));
}

From source file:aplicacion.sistema.indexer.test.FileDocument.java

License:Apache License

/** Makes a document for a File.
  <p>/*from  w  w w . j  a v a  2  s  . c o  m*/
  The document has three fields:
  <ul>
  <li><code>path</code>--containing the pathname of the file, as a stored,
  untokenized field;
  <li><code>modified</code>--containing the last modified date of the file as
  a field as created by <a
  href="lucene.document.DateTools.html">DateTools</a>; and
  <li><code>contents</code>--containing the full contents of the file, as a
  Reader field;
  */
public static Document Document(File f) throws java.io.FileNotFoundException {

    // make a new, empty document
    Document doc = new Document();

    // Add the path of the file as a field named "path".  Use a field that is 
    // indexed (i.e. searchable), but don't tokenize the field into words.
    doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));

    // Add the last modified date of the file a field named "modified".  Use 
    // a field that is indexed (i.e. searchable), but don't tokenize the field
    // into words.
    doc.add(new Field("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
            Field.Store.YES, Field.Index.NOT_ANALYZED));

    // Add the contents of the file to a field named "contents".  Specify a Reader,
    // so that the text of the file is tokenized and indexed, but not stored.
    // Note that FileReader expects the file to be in the system's default encoding.
    // If that's not the case searching for special characters will fail.
    doc.add(new Field("contents", new FileReader(f)));

    // return the document
    return doc;
}

From source file:busqueda.HTMLDocument.java

License:Apache License

public static String uid(File f) {
    // Append path and date into a string in such a way that lexicographic
    // sorting gives the same results as a walk of the file hierarchy.  Thus
    // null (\u0000) is used both to separate directory components and to
    // separate the path from the date.
    return f.getPath().replace(dirSep, '\u0000') + "\u0000"
            + DateTools.timeToString(f.lastModified(), DateTools.Resolution.SECOND);
}

From source file:busqueda.HTMLDocument.java

License:Apache License

public static Document Document(File f) throws IOException, InterruptedException {
    // make a new, empty document
    Document doc = new Document();

    // Add the url as a field named "path".  Use a field that is 
    // indexed (i.e. searchable), but don't tokenize the field into words.
    doc.add(new Field("path", f.getPath().replace(dirSep, '/'), Field.Store.YES, Field.Index.NOT_ANALYZED));

    // Add the last modified date of the file a field named "modified".  
    // Use a field that is indexed (i.e. searchable), but don't tokenize
    // the field into words.
    doc.add(new Field("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
            Field.Store.YES, Field.Index.NOT_ANALYZED));

    // Add the uid as a field, so that index can be incrementally maintained.
    // This field is not stored with document, it is indexed, but it is not
    // tokenized prior to indexing.
    doc.add(new Field("uid", uid(f), Field.Store.NO, Field.Index.NOT_ANALYZED));

    FileInputStream fis = new FileInputStream(f);
    HTMLParser parser = new HTMLParser(fis);

    // Add the tag-stripped contents as a Reader-valued Text field so it will
    // get tokenized and indexed.
    doc.add(new Field("contents", parser.getReader()));

    // Add the summary as a field that is stored and returned with
    // hit documents for display.
    doc.add(new Field("summary", parser.getSummary(), Field.Store.YES, Field.Index.NO));

    // Add the title as a field that it can be searched and that is stored.
    doc.add(new Field("title", parser.getTitle(), Field.Store.YES, Field.Index.ANALYZED));

    // return the document
    return doc;//from ww  w  .j  a va2  s  .  c  om
}

From source file:com.edu.lucene.FileDocument.java

License:Apache License

/** Makes a document for a File.
<p>/*  w ww .  jav a  2s . c o m*/
The document has three fields:
<ul>
<li><code>path</code>--containing the pathname of the file, as a stored,
untokenized field;
<li><code>modified</code>--containing the last modified date of the file as
a field as created by <a
href="lucene.document.DateTools.html">DateTools</a>; and
<li><code>contents</code>--containing the full contents of the file, as a
Reader field;
*/
public static Document Document(File f) throws java.io.FileNotFoundException {

    // make a new, empty document
    Document doc = new Document();

    // Add the path of the file as a field named "path". Use a field that is
    // indexed (i.e. searchable), but don't tokenize the field into words.
    doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));

    // Add the last modified date of the file a field named "modified". Use
    // a field that is indexed (i.e. searchable), but don't tokenize the field
    // into words.
    doc.add(new Field("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
            Field.Store.YES, Field.Index.NOT_ANALYZED));

    // Add the contents of the file to a field named "contents". Specify a Reader,
    // so that the text of the file is tokenized and indexed, but not stored.
    // Note that FileReader expects the file to be in the system's default encoding.
    // If that's not the case searching for special characters will fail.
    doc.add(new Field("contents", new FileReader(f)));

    // return the document
    return doc;
}

From source file:com.emental.mindraider.core.search.FileDocument.java

License:Apache License

/**
 * Makes a document for a File./*from www .  j  a va 2s  .  com*/
 * <p>
 * The document has three fields:
 * <ul>
 * <li><code>path</code> containing the pathname of the file, as a
 * stored, tokenized field;
 * <li><code>modified</code> containing the last modified date of the
 * file as a keyword field as encoded by <a
 * href="lucene.document.DateField.html">DateField</a>; and
 * <li><code>contents</code> containing the full contents of the file, as
 * a Reader field;
 * </ul>
 */
public static Document Document(File f, String notebookLabel, String conceptLabel, String conceptUri)
        throws java.io.FileNotFoundException {

    // make a new, empty lucene document
    Document doc = new Document();

    // no assemble the document from fields - some of them will be searchable,
    // others will be available in the result (as document attributes) i.e. stored in the index
    Field field;

    // concept URI as attribute - used to delete the document
    field = new Field("uri", conceptUri, Field.Store.YES, Field.Index.UN_TOKENIZED);
    doc.add(field);
    // path as attribute
    field = new Field("path", f.getPath(), Field.Store.YES, Field.Index.NO);
    doc.add(field);
    // SEARCHABLE concept label 
    field = new Field("conceptLabel", conceptLabel, Field.Store.YES, Field.Index.TOKENIZED);
    doc.add(field);
    // notebook label attribute 
    field = new Field("outlineLabel", notebookLabel, Field.Store.YES, Field.Index.NO);
    doc.add(field);
    // timestamp as attribute
    field = new Field("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.SECOND),
            Field.Store.YES, Field.Index.NO);
    doc.add(field);

    // concept annotation - the most important
    FileInputStream is = new FileInputStream(f);
    Reader reader = new BufferedReader(new InputStreamReader(is));
    field = new Field("contents", reader);
    doc.add(field);

    // return the document
    return doc;
}

From source file:com.esri.gpt.catalog.lucene.QueryProvider.java

License:Apache License

/**
 * Checks if provided date is a full date stored in the index. Full date is
 * a date of milliseconds resolution.//w w w .j a  va  2s .  c  om
 * @param queryText possibly a full date
 * @return <code>true</code> if this is a full date.
 */
private boolean isFullDate(String queryText) {
    try {
        queryText = Val.chkStr(queryText);
        long lngDate = DateTools.stringToTime(queryText);
        return queryText.matches("[0-9]+") && queryText.length() >= DateTools
                .timeToString(lngDate, DateTools.Resolution.MILLISECOND).length();
    } catch (java.text.ParseException ex) {
        return false;
    }
}

From source file:com.esri.gpt.catalog.lucene.TimestampField.java

License:Apache License

/**
 * Converts a time stamp to a String that can be indexed for search.
 * @param value the times stamp to convert
 * @return the indexable string//from w w  w  .j  ava2  s  . c o  m
 */
protected static String timestampToIndexableString(Timestamp value) {
    if (value == null) {
        return null;
    } else {
        return DateTools.timeToString(value.getTime(), DateTools.Resolution.MILLISECOND);
    }
}

From source file:com.esri.gpt.server.assertion.index.AsnSystemPart.java

License:Apache License

/**
 * Appends fields to a document for indexing.
 * @param document the document//w ww. j  av a2  s  .  co  m
 */
public void appendWritableFields(Document document) {
    Field fld;
    String val;
    long millis;

    // sys.assertionid
    val = Val.chkStr(this.getAssertionId());
    if (val.length() == 0) {
        this.setAssertionId(UUID.randomUUID().toString());
        val = Val.chkStr(this.getAssertionId());
    }
    fld = new Field(AsnConstants.FIELD_SYS_ASSERTIONID, val, Field.Store.YES, Field.Index.NOT_ANALYZED,
            Field.TermVector.NO);
    document.add(fld);

    // sys.edit.timestamp
    if (this.getEditTimestamp() != null) {
        millis = this.getEditTimestamp().getTime();
        val = DateTools.timeToString(millis, DateTools.Resolution.MILLISECOND);
        fld = new Field(AsnConstants.FIELD_SYS_EDIT_TIMESTAMP, val, Field.Store.YES, Field.Index.NOT_ANALYZED,
                Field.TermVector.NO);
        document.add(fld);
    }

    // sys.enabled
    if (!this.getEnabled()) {
        fld = new Field(AsnConstants.FIELD_SYS_ENABLED, "false", Field.Store.YES, Field.Index.NOT_ANALYZED,
                Field.TermVector.NO);
        document.add(fld);
    }

    // sys.resourceid
    val = Val.chkStr(this.getResourceId());
    if (val.length() > 0) {
        fld = new Field(AsnConstants.FIELD_SYS_RESOURCEID, val, Field.Store.YES, Field.Index.NOT_ANALYZED,
                Field.TermVector.NO);
        document.add(fld);
    }

    // sys.timestamp
    if (this.getTimestamp() == null) {
        this.setTimestamp(new Timestamp(System.currentTimeMillis()));
    }
    millis = this.getTimestamp().getTime();
    val = DateTools.timeToString(millis, DateTools.Resolution.MILLISECOND);
    fld = new Field(AsnConstants.FIELD_SYS_TIMESTAMP, val, Field.Store.YES, Field.Index.NOT_ANALYZED,
            Field.TermVector.NO);
    document.add(fld);

}

From source file:com.gitblit.LuceneExecutor.java

License:Apache License

/**
 * This completely indexes the repository and will destroy any existing
 * index./*  www  . ja v a 2s .c  om*/
 * 
 * @param repositoryName
 * @param repository
 * @return IndexResult
 */
public IndexResult reindex(RepositoryModel model, Repository repository) {
    IndexResult result = new IndexResult();
    if (!deleteIndex(model.name)) {
        return result;
    }
    try {
        String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
        FileBasedConfig config = getConfig(repository);
        Set<String> indexedCommits = new TreeSet<String>();
        IndexWriter writer = getIndexWriter(model.name);
        // build a quick lookup of tags
        Map<String, List<String>> tags = new HashMap<String, List<String>>();
        for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
            if (!tag.isAnnotatedTag()) {
                // skip non-annotated tags
                continue;
            }
            if (!tags.containsKey(tag.getObjectId())) {
                tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
            }
            tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
        }

        ObjectReader reader = repository.newObjectReader();

        // get the local branches
        List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);

        // sort them by most recently updated
        Collections.sort(branches, new Comparator<RefModel>() {
            @Override
            public int compare(RefModel ref1, RefModel ref2) {
                return ref2.getDate().compareTo(ref1.getDate());
            }
        });

        // reorder default branch to first position
        RefModel defaultBranch = null;
        ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
        for (RefModel branch : branches) {
            if (branch.getObjectId().equals(defaultBranchId)) {
                defaultBranch = branch;
                break;
            }
        }
        branches.remove(defaultBranch);
        branches.add(0, defaultBranch);

        // walk through each branch
        for (RefModel branch : branches) {

            boolean indexBranch = false;
            if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
                    && branch.equals(defaultBranch)) {
                // indexing "default" branch
                indexBranch = true;
            } else if (IssueUtils.GB_ISSUES.equals(branch)) {
                // skip the GB_ISSUES branch because it is indexed later
                // note: this is different than updateIndex
                indexBranch = false;
            } else {
                // normal explicit branch check
                indexBranch = model.indexedBranches.contains(branch.getName());
            }

            // if this branch is not specifically indexed then skip
            if (!indexBranch) {
                continue;
            }

            String branchName = branch.getName();
            RevWalk revWalk = new RevWalk(reader);
            RevCommit tip = revWalk.parseCommit(branch.getObjectId());
            String tipId = tip.getId().getName();

            String keyName = getBranchKey(branchName);
            config.setString(CONF_ALIAS, null, keyName, branchName);
            config.setString(CONF_BRANCH, null, keyName, tipId);

            // index the blob contents of the tree
            TreeWalk treeWalk = new TreeWalk(repository);
            treeWalk.addTree(tip.getTree());
            treeWalk.setRecursive(true);

            Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
            while (treeWalk.next()) {
                // ensure path is not in a submodule
                if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
                    paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
                }
            }

            ByteArrayOutputStream os = new ByteArrayOutputStream();
            byte[] tmp = new byte[32767];

            RevWalk commitWalk = new RevWalk(reader);
            commitWalk.markStart(tip);

            RevCommit commit;
            while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
                TreeWalk diffWalk = new TreeWalk(reader);
                int parentCount = commit.getParentCount();
                switch (parentCount) {
                case 0:
                    diffWalk.addTree(new EmptyTreeIterator());
                    break;
                case 1:
                    diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
                    break;
                default:
                    // skip merge commits
                    continue;
                }
                diffWalk.addTree(getTree(commitWalk, commit));
                diffWalk.setFilter(ANY_DIFF);
                diffWalk.setRecursive(true);
                while ((paths.size() > 0) && diffWalk.next()) {
                    String path = diffWalk.getPathString();
                    if (!paths.containsKey(path)) {
                        continue;
                    }

                    // remove path from set
                    ObjectId blobId = paths.remove(path);
                    result.blobCount++;

                    // index the blob metadata
                    String blobAuthor = getAuthor(commit);
                    String blobCommitter = getCommitter(commit);
                    String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);

                    Document doc = new Document();
                    doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
                            Index.NOT_ANALYZED_NO_NORMS));
                    doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
                    doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));

                    // determine extension to compare to the extension
                    // blacklist
                    String ext = null;
                    String name = path.toLowerCase();
                    if (name.indexOf('.') > -1) {
                        ext = name.substring(name.lastIndexOf('.') + 1);
                    }

                    // index the blob content
                    if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                        ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
                        InputStream in = ldr.openStream();
                        int n;
                        while ((n = in.read(tmp)) > 0) {
                            os.write(tmp, 0, n);
                        }
                        in.close();
                        byte[] content = os.toByteArray();
                        String str = StringUtils.decodeString(content, encodings);
                        doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
                        os.reset();
                    }

                    // add the blob to the index
                    writer.addDocument(doc);
                }
            }

            os.close();

            // index the tip commit object
            if (indexedCommits.add(tipId)) {
                Document doc = createDocument(tip, tags.get(tipId));
                doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                writer.addDocument(doc);
                result.commitCount += 1;
                result.branchCount += 1;
            }

            // traverse the log and index the previous commit objects
            RevWalk historyWalk = new RevWalk(reader);
            historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
            RevCommit rev;
            while ((rev = historyWalk.next()) != null) {
                String hash = rev.getId().getName();
                if (indexedCommits.add(hash)) {
                    Document doc = createDocument(rev, tags.get(hash));
                    doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                    writer.addDocument(doc);
                    result.commitCount += 1;
                }
            }
        }

        // finished
        reader.release();

        // this repository has a gb-issues branch, index all issues
        if (IssueUtils.getIssuesBranch(repository) != null) {
            List<IssueModel> issues = IssueUtils.getIssues(repository, null);
            if (issues.size() > 0) {
                result.branchCount += 1;
            }
            for (IssueModel issue : issues) {
                result.issueCount++;
                Document doc = createDocument(issue);
                writer.addDocument(doc);
            }
        }

        // commit all changes and reset the searcher
        config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
        config.save();
        writer.commit();
        resetIndexSearcher(model.name);
        result.success();
    } catch (Exception e) {
        logger.error("Exception while reindexing " + model.name, e);
    }
    return result;
}