Reduce parallelism to avoid overwhelming server with 404s

This commit is contained in:
Fam Zheng 2025-12-27 17:26:44 +00:00
parent 73529bec16
commit 069205946a

View File

@ -65,7 +65,7 @@ class ScanDataFetcher(object):
if sample_rate: if sample_rate:
fetch_backlog = random.sample(fetch_backlog, int(len(fetch_backlog) * sample_rate)) fetch_backlog = random.sample(fetch_backlog, int(len(fetch_backlog) * sample_rate))
logger.info(f'fetch_backlog: {len(fetch_backlog)}') logger.info(f'fetch_backlog: {len(fetch_backlog)}')
pool = mp.Pool(mp.cpu_count() * 4) pool = mp.Pool(min(8, mp.cpu_count()))
counts = defaultdict(int) counts = defaultdict(int)
for r in tqdm(pool.imap_unordered(self.fetch_one_scan, fetch_backlog), total=len(fetch_backlog)): for r in tqdm(pool.imap_unordered(self.fetch_one_scan, fetch_backlog), total=len(fetch_backlog)):
counts[r] += 1 counts[r] += 1