diff options
author | Tzafrir Cohen <tzafrir@cohens.org.il> | 2017-11-25 17:41:16 +0200 |
---|---|---|
committer | Tzafrir Cohen <tzafrir@cohens.org.il> | 2017-11-25 17:41:16 +0200 |
commit | adb409da9df04e173e87c25429c9cec652a81ba5 (patch) | |
tree | 6de684a1a84b4799fb95dd8d977a7c9fea021f9b | |
parent | 95e9a704a9feef853dfb31eaa1482e33e8844ca9 (diff) |
parallel (multithreaded) parsing of main pages
Parsing each item requires fetching a URL. This should be done in
parallel. Moving this task to worker threads.
Each thread writes the results to its own index in the result array.
-rw-r--r-- | default.py | 41 |
1 files changed, 30 insertions, 11 deletions
@@ -14,6 +14,7 @@ import urllib #import urllib2 import requests import simplecache +import threading import urlparse import xbmc import xbmcgui @@ -106,6 +107,22 @@ def title_checksum(title): return zlib.adler32(title.encode('utf-8')) +def get_program_item(res_array, page, a, i): + """ A thread worker to get information about a program page + + input: a: the a element from the program's page. + + Writes results to the specified index in the results array. + """ + path = a.get('href') + show_id = re.sub('.*=', '', path) + title = get_show_title(KAN_URL, path) + checksum = title_checksum(title) + url = page.build_url({'mode': 'show', 'id': show_id, + 'checksum': str(checksum)}) + res_array[i] = (title, url) + + def video_top_menu(page, name): """ Display a menu of all the TV shows """ trace("Show top menu for " + name) @@ -122,17 +139,19 @@ def video_top_menu(page, name): anchors = parsed.find_all('a', class_="program_category_link w-inline-block") trace("got anchors: " + str(len(anchors))) - items = [] - for a in anchors: - path = a.get('href') - show_id = re.sub('.*=', '', path) - title = get_show_title(KAN_URL, path) - checksum = title_checksum(title) - url = page.build_url({'mode': 'show', 'id': show_id, - 'checksum': str(checksum)}) - items.append((title, url)) - page.build_page(items, isFolder=True) - SiteCache.set(cache_id, items, expiration=datetime.timedelta(days=1)) + page_items = [None for item in anchors] + threads = [] + for i in range(0, len(anchors)): + t = threading.Thread(target=get_program_item, + args=(page_items, page, anchors[i], i)) + t.start() + threads.append(t) + for t in threads: + t.join() + # FIXME: check if result is still None. If so: handle error? + + page.build_page(page_items, isFolder=True) + SiteCache.set(cache_id, page_items, expiration=datetime.timedelta(days=1)) def show_menu(page): |