summaryrefslogtreecommitdiff
path: root/plugin.video.ecbtv/resources/lib/api.py
diff options
context:
space:
mode:
Diffstat (limited to 'plugin.video.ecbtv/resources/lib/api.py')
-rw-r--r--plugin.video.ecbtv/resources/lib/api.py152
1 files changed, 105 insertions, 47 deletions
diff --git a/plugin.video.ecbtv/resources/lib/api.py b/plugin.video.ecbtv/resources/lib/api.py
index 6f57dcd..c8072c8 100644
--- a/plugin.video.ecbtv/resources/lib/api.py
+++ b/plugin.video.ecbtv/resources/lib/api.py
@@ -28,38 +28,55 @@
Module for extracting video links from the England and Wales Cricket Board website
'''
-import json
import os
+import re
from urlparse import urljoin, urlparse, urlunparse
from urllib import urlencode
from datetime import datetime
import time
from collections import namedtuple
+import math
import requests
from bs4 import BeautifulSoup
-HOST = 'http://www.ecb.co.uk'
-BASE_URL = urljoin(HOST, 'tv/')
+
+BASE_URL = 'http://www.ecb.co.uk/'
HLS_HOST = 'https://secure.brightcove.com/'
HLS_URL_FMT = urljoin(HLS_HOST, 'services/mobile/streaming/index/master.m3u8?videoId={}')
-SEARCH_URL = 'https://content-ecb.pulselive.com/search/ecb/'
+PLAYER_THUMB_URL_FMT = 'https://ecb-resources.s3.amazonaws.com/player-photos/{}/480x480/{}.png'
+SEARCH_URL = 'https://content-ecb.pulselive.com/search/ecb/'
+VIDEO_LIST_URL = 'https://content-ecb.pulselive.com/content/ecb/EN/'
Video = namedtuple('Video', 'title url thumbnail date duration')
+Entity = namedtuple('Entity', 'name reference thumbnail')
+
+
+def _video_list_url(reference, page, page_size=10):
+ '''Returns a URL for a list of videos'''
+ url_parts = list(urlparse(VIDEO_LIST_URL))
+ query_params = dict(
+ contentTypes='video',
+ references=reference if reference is not None else '',
+ page=page - 1,
+ pageSize=page_size
+ )
+ url_parts[4] = urlencode(query_params)
+ return urlunparse(url_parts)
-def _search_url(term, start, size):
+def _search_url(term, page, page_size=10):
'''Returns a URL for the JSON search api'''
url_parts = list(urlparse(SEARCH_URL))
query_params = dict(
type='VIDEO',
fullObjectResponse=True,
terms=term,
- size=size,
- start=start
+ size=page_size,
+ start=(page - 1) * page_size
)
url_parts[4] = urlencode(query_params)
return urlunparse(url_parts)
@@ -78,12 +95,6 @@ def _date_from_str(date_str, fmt='%d %B %Y'):
return datetime(*(time.strptime(date_str, fmt)[0:6])).date()
-def _date(media_item):
- '''Returns a date object from the HTML media item.'''
- date_str = media_item.find('time', 'media__sub-meta').string
- return _date_from_str(date_str)
-
-
def _date_json(json_item):
'''Returns a date object from the JSON item.
The date can be one of two formats'''
@@ -98,60 +109,107 @@ def _date_json(json_item):
raise exc
-def categories():
- '''Generator for category names and links, excluding all that appear before Home'''
- start = False
- for submenu_link in _soup()('a', 'submenu__link'):
- title = submenu_link.string.strip()
- if start and title != 'All Categories':
- yield title, os.path.basename(submenu_link['href'])
- if title == 'Home':
- start = True
+def _thumbnail_variant(video):
+ if video['thumbnail'] is None:
+ return
+ return (variant['url'] for variant in video['thumbnail']['variants']
+ if variant['tag']['id'] == 981).next()
-def videos(path):
- '''Generator for all videos from a particular page'''
- for media_item in _soup(path)('a', 'media__item'):
- video = json.loads(media_item['data-ui-args'])
- yield Video(
- title=media_item.find('span', 'media__title').string,
- url=HLS_URL_FMT.format(video['mediaId']),
- thumbnail=media_item.picture.img['data-highres-img'],
- date=_date(media_item),
- duration=int(video['duration'].replace(',', ''))
+def england():
+ return Entity(
+ name='England',
+ reference='cricket_team:11',
+ thumbnail=None
+ )
+
+
+def counties():
+ for county in _soup('/county-championship/teams')('div', 'partners__item'):
+ team_id = int(os.path.basename(county.a['href']))
+ yield Entity(
+ name=county.a.text,
+ reference='cricket_team:{}'.format(team_id),
+ thumbnail=county.img['src']
+ )
+
+
+def player_categories():
+ for tab in _soup('/england/men/players').find_all(
+ 'div', attrs={'data-ui-args': re.compile(r'{ "title": "\w+" }')}):
+ yield Entity(
+ name=tab['data-ui-tab'],
+ reference=None,
+ thumbnail=None
+ )
+
+
+def players(category='Test'):
+ soup = _soup('/england/men/players').find('div', attrs={'data-ui-tab': category})
+ for player in soup('section', 'profile-player-card'):
+ player_id = player.img['data-player']
+ yield Entity(
+ name=player.img['alt'],
+ reference='cricket_player:{}'.format(player_id),
+ thumbnail=PLAYER_THUMB_URL_FMT.format(category.lower(), player_id)
)
-def search_results(term, start=0, size=10):
+def _video(video):
+ return Video(
+ title=video['title'],
+ url=HLS_URL_FMT.format(video['mediaId']),
+ thumbnail=_thumbnail_variant(video),
+ date=_date_json(video),
+ duration=video['duration']
+ )
+
+
+def _videos(videos_json):
+ '''Generator for all videos from a particular page'''
+ for video in videos_json['content']:
+ yield _video(video)
+
+
+def videos(reference=None, page=1, page_size=10):
+ videos_json = requests.get(_video_list_url(reference, page, page_size)).json()
+ npages = videos_json['pageInfo']['numPages']
+ return _videos(videos_json), npages
+
+
+def _search_results(search_results_json):
'''Generator for videos matching a search term'''
- results = requests.get(_search_url(term, start, size)).json()['hits']['hit']
+ results = search_results_json['hits']['hit']
for result in results:
video = result['response']
- yield Video(
- title=video['title'],
- url=HLS_URL_FMT.format(video['mediaId']),
- thumbnail=video['imageUrl'],
- date=_date_json(video),
- duration=video['duration']
- )
+ yield _video(video)
+
+
+def search_results(term, page=1, page_size=10):
+ search_results_json = requests.get(_search_url(term, page, page_size)).json()
+ total = search_results_json['hits']['found']
+ npages = int(math.ceil(float(total) / page_size))
+ return _search_results(search_results_json), npages
-def _print_all_videos():
+def _print_team_videos():
'''Test function to print all categories and videos'''
- for title, path in categories():
- print '{} ({})'.format(title, path)
- for video in videos(path):
+ for team in [england()] + list(counties()):
+ print '{} ({})'.format(team.name, team.reference)
+ videos_page, _num_pages = videos(team.reference)
+ for video in videos_page:
print '\t', video.title
def _print_search_results(term):
'''Test function to print search results'''
print 'Search: {}'.format(term)
- for video in search_results(term):
+ videos_page, _num_pages = search_results(term)
+ for video in videos_page:
print '\t', video.title
if __name__ == '__main__':
- _print_all_videos()
+ _print_team_videos()
print
_print_search_results('test cricket')