diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-09-30 09:11:21 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-09-30 09:11:21 -0400 |
commit | 2fcdd2df10bb6b8bff695fa5cc1b87d108590847 (patch) | |
tree | 69f15bb6c1ad3a1c2b1236d304411aa4d7ae298f | |
parent | 8a72c8865619b68a8c200e43c9f1e476cf846b58 (diff) | |
download | grokmirror-2fcdd2df10bb6b8bff695fa5cc1b87d108590847.tar.gz |
Use faster dir searching algorithm
Switch back to the os.walk algorithm because it allows us more control
over the dirs we find and avoid needlessly recursing into the git dirs.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | grokmirror/__init__.py | 65 | ||||
-rwxr-xr-x | grokmirror/fsck.py | 2 |
2 files changed, 36 insertions, 31 deletions
diff --git a/grokmirror/__init__.py b/grokmirror/__init__.py index 0a25b84..999e9c0 100644 --- a/grokmirror/__init__.py +++ b/grokmirror/__init__.py @@ -738,7 +738,7 @@ def is_obstrepo(fullpath, obstdir): return fullpath.find(obstdir) == 0 -def find_all_gitdirs(toplevel, ignore=None, normalize=False, exclude_objstore=True, flat=False): +def find_all_gitdirs(toplevel, ignore=None, normalize=False, exclude_objstore=True): global _alt_repo_map if _alt_repo_map is None: _alt_repo_map = dict() @@ -752,39 +752,44 @@ def find_all_gitdirs(toplevel, ignore=None, normalize=False, exclude_objstore=Tr logger.info(' search: finding all repos in %s', toplevel) logger.debug('Ignore list: %s', ' '.join(ignore)) gitdirs = set() - tp = pathlib.Path(toplevel) - if flat: - globpatt = '*.git' - else: - globpatt = '**/*.git' - for subp in tp.glob(globpatt): - # Should we ignore this dir? - ignored = False - for ignoreglob in ignore: - if subp.match(ignoreglob): - ignored = True - break - if ignored: - continue - fullpath = subp.resolve().as_posix() - if not is_bare_git_repo(fullpath): + for root, dirs, files in os.walk(toplevel, topdown=True): + if not len(dirs): continue - if exclude_objstore and os.path.exists(os.path.join(fullpath, 'grokmirror.objstore')): - continue - if normalize: - fullpath = os.path.realpath(fullpath) - logger.debug('Found %s', fullpath) - if fullpath not in gitdirs: + torm = set() + for name in dirs: + fullpath = os.path.join(root, name) + # Should we ignore this dir? + ignored = False + for ignoredir in ignore: + if fnmatch.fnmatch(fullpath, ignoredir): + torm.add(name) + ignored = True + break + if ignored: + continue + if not is_bare_git_repo(fullpath): + continue + if exclude_objstore and os.path.exists(os.path.join(fullpath, 'grokmirror.objstore')): + continue + if normalize: + fullpath = os.path.realpath(fullpath) + + logger.debug('Found %s', os.path.join(root, name)) gitdirs.add(fullpath) + torm.add(name) - if build_amap: - altrepo = get_altrepo(fullpath) - if not altrepo: - continue - if altrepo not in _alt_repo_map: - _alt_repo_map[altrepo] = set() - _alt_repo_map[altrepo].add(fullpath) + if build_amap: + altrepo = get_altrepo(fullpath) + if not altrepo: + continue + if altrepo not in _alt_repo_map: + _alt_repo_map[altrepo] = set() + _alt_repo_map[altrepo].add(fullpath) + + for name in torm: + # don't recurse into the found *.git dirs + dirs.remove(name) return gitdirs diff --git a/grokmirror/fsck.py b/grokmirror/fsck.py index 411e8e7..c4703cf 100755 --- a/grokmirror/fsck.py +++ b/grokmirror/fsck.py @@ -983,7 +983,7 @@ def fsck_mirror(config, force=False, repack_only=False, conn_only=False, grokmirror.manifest_lock(manifile) manifest = grokmirror.read_manifest(manifile) - obstrepos = grokmirror.find_all_gitdirs(obstdir, normalize=True, exclude_objstore=False, flat=True) + obstrepos = grokmirror.find_all_gitdirs(obstdir, normalize=True, exclude_objstore=False) analyzed = 0 queued = 0 |