I'm trying to get this function work asynchronously (I have tried asyncio, threadpoolexecutor, processpoolexecutor and still no luck). It takes around 11 seconds on my PC to complete a batch 500 items and there isno difference compared to plain for loop, so I assume It doesn't work as expected (in parallel).
here is the function:
from unidecode import unidecode
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool
pool = ThreadPool(4)
def is_it_bad(word):
for item in all_names:
if str(word) in str(item['name']):
return item
item = {'name':word, 'gender': 2}
return item
def check_word(arr):
fname = unidecode(str(arr[1]['fullname'] + ' ' + arr[1]['username'])).replace('([^a-z ]+)', ' ').lower()
fname = fname + ' ' + fname.replace(' ', '')
fname = fname.split(' ')
genders = []
for chunk in fname:
if len(chunk) > 2:
genders.append(int(is_it_bad('_' + chunk + '_')['gender']))
if set(genders) == {2}:
followers[arr[0]]['gender'] = 2
#results_new.append(name)
elif set([0,1]).issubset(genders):
followers[arr[0]]['gender'] = 2
#results_new.append(name)
else:
if 0 in genders:
followers[arr[0]]['gender'] = 0
#results_new.append(name)
else:
followers[arr[0]]['gender'] = 1
#results_new.append(name)
results = pool.map(check_word, [(idx, name) for idx, name in enumerate(names)])
Can you please help me with this