Reduce parallelism to avoid overwhelming server with 404s
This commit is contained in:
parent
73529bec16
commit
069205946a
@ -65,7 +65,7 @@ class ScanDataFetcher(object):
|
|||||||
if sample_rate:
|
if sample_rate:
|
||||||
fetch_backlog = random.sample(fetch_backlog, int(len(fetch_backlog) * sample_rate))
|
fetch_backlog = random.sample(fetch_backlog, int(len(fetch_backlog) * sample_rate))
|
||||||
logger.info(f'fetch_backlog: {len(fetch_backlog)}')
|
logger.info(f'fetch_backlog: {len(fetch_backlog)}')
|
||||||
pool = mp.Pool(mp.cpu_count() * 4)
|
pool = mp.Pool(min(8, mp.cpu_count()))
|
||||||
counts = defaultdict(int)
|
counts = defaultdict(int)
|
||||||
for r in tqdm(pool.imap_unordered(self.fetch_one_scan, fetch_backlog), total=len(fetch_backlog)):
|
for r in tqdm(pool.imap_unordered(self.fetch_one_scan, fetch_backlog), total=len(fetch_backlog)):
|
||||||
counts[r] += 1
|
counts[r] += 1
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user