Skip to main content

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [List Home]
Re: [jgit-dev] Performance of indexing blob metadata

Hi Kevin,
 
That is a great tip.  Unfortunately it made 0 difference, same runtime results.  Attached below is my modified unit test class using the shared ObjectReader.  I also modified the fullIndex check to isolate the expensive line which is next().
 
-J
 
 
On Sat, Mar 10, 2012, at 02:28 PM, Kevin Sawicki wrote:
Hi,
 
Have you tried sharing an ObjectReader between all RevWalk and TreeWalk objects created?

Obtain an ObjectReader by calling Repository.newObjectReader() and then pass the reader to all RevWalk and TreeWalk objects created instead of passing the Repository handle.
 
Don't call dispose/release on the RevWalk or release on the TreeWalk, just call release on the ObjectReader once you have done all the processing for a given repository.
 
Sincerely,
Kevin Sawicki
 
 
package com.gitblit.tests;

import java.text.MessageFormat;
import java.util.Collections;
import java.util.Map;

import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.treewalk.filter.AndTreeFilter;
import org.eclipse.jgit.treewalk.filter.PathFilterGroup;
import org.eclipse.jgit.treewalk.filter.TreeFilter;
import org.junit.Test;

import com.gitblit.utils.StringUtils;

public class NastyTest {

    @Test
    public void quickTraversalTest() throws Exception {
        Repository repo = GitBlitSuite.getJGitRepository();
        System.out.println("----------------------------");
        System.out.println("Quick Traversal Test");
        System.out.println("----------------------------");
        traverse(repo, false);
        repo.close();
    }

    @Test
    public void fullTraversalTest() throws Exception {
        Repository repo = GitBlitSuite.getJGitRepository();
        System.out.println("----------------------------");
        System.out.println("Full Traversal Test");
        System.out.println("----------------------------");
        traverse(repo, true);
        repo.close();
    }

    private void traverse(Repository repo, boolean fullIndex) throws Exception {
        ObjectReader reader = repo.newObjectReader();
        Map<String, Ref> locals = repo.getRefDatabase().getRefs(Constants.R_HEADS);
        for (Map.Entry<String, Ref> entry : locals.entrySet()) {
            long start = System.currentTimeMillis();
            System.out.print("  traversing " + entry.getKey() + "...");
            int blobCount = 0;
            int commitCount = 0;

            Ref ref = entry.getValue();
            RevWalk revWalk = new RevWalk(reader);
            RevCommit head = revWalk.parseCommit(ref.getObjectId());

            TreeWalk treeWalk = new TreeWalk(reader);
            treeWalk.addTree(head.getTree());
            treeWalk.setRecursive(true);

            while (treeWalk.next()) {
                blobCount++;
                String blobPath = treeWalk.getPathString();
                RevCommit blobRev = head;
                                
                // determine the most recent commit for this blob
                RevWalk blobWalk = new RevWalk(reader);
                blobWalk.markStart(blobWalk.parseCommit(head.getId()));
                TreeFilter filter = AndTreeFilter.create(
                        PathFilterGroup.createFromStrings(Collections.singleton(blobPath)),
                        TreeFilter.ANY_DIFF);
                blobWalk.setTreeFilter(filter);
                
                if (fullIndex) {                    
                    // XXX this next() is the really expensive operation
                    blobRev = blobWalk.next();
                }

                String blobAuthor = getAuthor(blobRev);
                String blobCommitter = getCommitter(blobRev);
                String blobDate = DateTools.timeToString(blobRev.getCommitTime() * 1000L,
                        Resolution.MINUTE);

                // index blob here
            }

            revWalk.reset();
            revWalk.markStart(head);
            RevCommit rev;
            while ((rev = revWalk.next()) != null) {
                // index commit here
                commitCount++;
            }
            float secs = (System.currentTimeMillis() - start)/1000f;
            System.out.println(MessageFormat.format(
                    "  found {0} blobs and {1} commits in {2} secs", blobCount, commitCount, secs));
        }
        
        // finish
        reader.release();
    }

    private String getAuthor(RevCommit commit) {
        String name = "unknown";
        try {
            name = commit.getAuthorIdent().getName();
            if (StringUtils.isEmpty(name)) {
                name = commit.getAuthorIdent().getEmailAddress();
            }
        } catch (NullPointerException n) {
        }
        return name;
    }

    private String getCommitter(RevCommit commit) {
        String name = "unknown";
        try {
            name = commit.getCommitterIdent().getName();
            if (StringUtils.isEmpty(name)) {
                name = commit.getCommitterIdent().getEmailAddress();
            }
        } catch (NullPointerException n) {
        }
        return name;
    }
}
 

Back to the top