summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTzafrir Cohen <tzafrir@cohens.org.il>2017-11-25 17:41:16 +0200
committerTzafrir Cohen <tzafrir@cohens.org.il>2017-11-25 17:41:16 +0200
commitadb409da9df04e173e87c25429c9cec652a81ba5 (patch)
tree6de684a1a84b4799fb95dd8d977a7c9fea021f9b
parent95e9a704a9feef853dfb31eaa1482e33e8844ca9 (diff)
parallel (multithreaded) parsing of main pages
Parsing each item requires fetching a URL. This should be done in parallel. Moving this task to worker threads. Each thread writes the results to its own index in the result array.
-rw-r--r--default.py41
1 files changed, 30 insertions, 11 deletions
diff --git a/default.py b/default.py
index c83c652..73aab17 100644
--- a/default.py
+++ b/default.py
@@ -14,6 +14,7 @@ import urllib
#import urllib2
import requests
import simplecache
+import threading
import urlparse
import xbmc
import xbmcgui
@@ -106,6 +107,22 @@ def title_checksum(title):
return zlib.adler32(title.encode('utf-8'))
+def get_program_item(res_array, page, a, i):
+ """ A thread worker to get information about a program page
+
+ input: a: the a element from the program's page.
+
+ Writes results to the specified index in the results array.
+ """
+ path = a.get('href')
+ show_id = re.sub('.*=', '', path)
+ title = get_show_title(KAN_URL, path)
+ checksum = title_checksum(title)
+ url = page.build_url({'mode': 'show', 'id': show_id,
+ 'checksum': str(checksum)})
+ res_array[i] = (title, url)
+
+
def video_top_menu(page, name):
""" Display a menu of all the TV shows """
trace("Show top menu for " + name)
@@ -122,17 +139,19 @@ def video_top_menu(page, name):
anchors = parsed.find_all('a',
class_="program_category_link w-inline-block")
trace("got anchors: " + str(len(anchors)))
- items = []
- for a in anchors:
- path = a.get('href')
- show_id = re.sub('.*=', '', path)
- title = get_show_title(KAN_URL, path)
- checksum = title_checksum(title)
- url = page.build_url({'mode': 'show', 'id': show_id,
- 'checksum': str(checksum)})
- items.append((title, url))
- page.build_page(items, isFolder=True)
- SiteCache.set(cache_id, items, expiration=datetime.timedelta(days=1))
+ page_items = [None for item in anchors]
+ threads = []
+ for i in range(0, len(anchors)):
+ t = threading.Thread(target=get_program_item,
+ args=(page_items, page, anchors[i], i))
+ t.start()
+ threads.append(t)
+ for t in threads:
+ t.join()
+ # FIXME: check if result is still None. If so: handle error?
+
+ page.build_page(page_items, isFolder=True)
+ SiteCache.set(cache_id, page_items, expiration=datetime.timedelta(days=1))
def show_menu(page):