From adb409da9df04e173e87c25429c9cec652a81ba5 Mon Sep 17 00:00:00 2001 From: Tzafrir Cohen Date: Sat, 25 Nov 2017 17:41:16 +0200 Subject: parallel (multithreaded) parsing of main pages Parsing each item requires fetching a URL. This should be done in parallel. Moving this task to worker threads. Each thread writes the results to its own index in the result array. --- default.py | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/default.py b/default.py index c83c652..73aab17 100644 --- a/default.py +++ b/default.py @@ -14,6 +14,7 @@ import urllib #import urllib2 import requests import simplecache +import threading import urlparse import xbmc import xbmcgui @@ -106,6 +107,22 @@ def title_checksum(title): return zlib.adler32(title.encode('utf-8')) +def get_program_item(res_array, page, a, i): + """ A thread worker to get information about a program page + + input: a: the a element from the program's page. + + Writes results to the specified index in the results array. + """ + path = a.get('href') + show_id = re.sub('.*=', '', path) + title = get_show_title(KAN_URL, path) + checksum = title_checksum(title) + url = page.build_url({'mode': 'show', 'id': show_id, + 'checksum': str(checksum)}) + res_array[i] = (title, url) + + def video_top_menu(page, name): """ Display a menu of all the TV shows """ trace("Show top menu for " + name) @@ -122,17 +139,19 @@ def video_top_menu(page, name): anchors = parsed.find_all('a', class_="program_category_link w-inline-block") trace("got anchors: " + str(len(anchors))) - items = [] - for a in anchors: - path = a.get('href') - show_id = re.sub('.*=', '', path) - title = get_show_title(KAN_URL, path) - checksum = title_checksum(title) - url = page.build_url({'mode': 'show', 'id': show_id, - 'checksum': str(checksum)}) - items.append((title, url)) - page.build_page(items, isFolder=True) - SiteCache.set(cache_id, items, expiration=datetime.timedelta(days=1)) + page_items = [None for item in anchors] + threads = [] + for i in range(0, len(anchors)): + t = threading.Thread(target=get_program_item, + args=(page_items, page, anchors[i], i)) + t.start() + threads.append(t) + for t in threads: + t.join() + # FIXME: check if result is still None. If so: handle error? + + page.build_page(page_items, isFolder=True) + SiteCache.set(cache_id, page_items, expiration=datetime.timedelta(days=1)) def show_menu(page): -- cgit v1.2.3