Multiprocessing Nested Loop
Solution 1:
def funcs(start,end):
tag=[]
list_all_img_id=[]
for i in range(start,end):
tag=get_tags(all_model_id[i][0])
for l in range(0,len(tag)):
list_all_img_id.append(get_images_id(tag[l][0],all_model_id[i][0]))
return(list_all_img_id)
from multiprocessing.pool import Pool
def main():
all_model_id=get_models_id()
len_all_model_id=len(all_model_id)
div_total = int(len_all_model_id / 3)
rest_div_total = len_all_model_id%3
with Pool(3) as pool:
results = []
# submit 3 tasks without blocking
results.append(pool.apply_async(funcs, args=(0, div_total)))
results.append(pool.apply_async(funcs, args=(div_total, div_total*2)))
results.append(pool.apply_async(funcs, args=(div_total*2, div_total*3 + rest_div_total + 1)))
# now await 3 results:
for result in results:
print(result.get())
if __name__ == "__main__":
main()
Note that apply_async
takes an optional callback argument where you can specify a function to be called with a result (the actual return value from the task) as soon as it becomes available, which may not be in the order in which the tasks were submitted. The above method of obtaining the results (i.e. relying on the result object returned from apply_async
on which a blocking call to get
can be made) will always obtain the results in task-submission order like the starmap
function, a reasonable alternative if you have all the call arguments for all the task submissions in an iterable such as a list or a tuple):
withPool(3) as pool:
results = pool.starmap(funcs, [
(0, div_total),
(div_total, div_total*2),
(div_total*2, div_total*3 + rest_div_total + 1)
])
for result in results:
print(result)
I, too, am a fan of the concurrent.futures
module but wanted to make the minimal number of changes to your program. But note that you can use the undocumented but nevertehless existant ThreadPool class that is compatible with the multiprocessing Pool class by simply invoking:
from mulitprocessing.poolimportThreadPool
instead of
from mulitprocessing.poolimportPool
and then specifying:
withThreadPool(3) as pool:
If your tasks are very I/O intensive, then threading may be a better option.
Solution 2:
I've modified your code a little bit, but I haven't run it yet. But I think my code will work for your problem. Ask me something it's not working for you.
from multiprocessing import Pool
import multiprocessing as mp
deffuncs(tupl):
start, end = tupl[0], tupl[1]
tag=[]
list_all_img_id=[]
for i inrange(start,end):
tag=get_tags(all_model_id[i][0])
for l inrange(0,len(tag)):
list_all_img_id.append(get_images_id(tag[l][0],all_model_id[i][0]))
return(list_all_img_id)
defmain():
all_model_id=get_models_id()
len_all_model_id=len(all_model_id)
div_total = int(len_all_model_id / 3)
rest_div_total = len_all_model_id%3
lst_args = [(0, div_total), (div_total, div_total*2), (div_total*2, div_total*3 + rest_div_total + 1)]
pool = mp.Pool(processes=3)
res = pool.map(funcs, list_args) # you can loop through res to get your results if __name__ == "__main__":
main()
Solution 3:
Try this out, using concurrent.futures module.
ThreadPoolExecutor(max_workers = 10)
(you can specify maximum workers).
Moreover if you want multiple Processes instead of Threads. You can simply replace ThreadPoolExecutor with ProcessPoolExecutor
tag=[]
all_model_id=get_models_id()
liste_all_img_id=[]
def func(model_id):
tag = get_tags(model_id[0])
for l in range(0,len(tag)):
liste_all_img_id.append(get_images_id(tag[l][0],model_id[0]))
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(func, all_model_id)
Post a Comment for "Multiprocessing Nested Loop"