diff options
Diffstat (limited to 'plugin.video.ecbtv/resources/lib/api.py')
-rw-r--r-- | plugin.video.ecbtv/resources/lib/api.py | 152 |
1 files changed, 105 insertions, 47 deletions
diff --git a/plugin.video.ecbtv/resources/lib/api.py b/plugin.video.ecbtv/resources/lib/api.py index 6f57dcd..c8072c8 100644 --- a/plugin.video.ecbtv/resources/lib/api.py +++ b/plugin.video.ecbtv/resources/lib/api.py @@ -28,38 +28,55 @@ Module for extracting video links from the England and Wales Cricket Board website ''' -import json import os +import re from urlparse import urljoin, urlparse, urlunparse from urllib import urlencode from datetime import datetime import time from collections import namedtuple +import math import requests from bs4 import BeautifulSoup -HOST = 'http://www.ecb.co.uk' -BASE_URL = urljoin(HOST, 'tv/') + +BASE_URL = 'http://www.ecb.co.uk/' HLS_HOST = 'https://secure.brightcove.com/' HLS_URL_FMT = urljoin(HLS_HOST, 'services/mobile/streaming/index/master.m3u8?videoId={}') -SEARCH_URL = 'https://content-ecb.pulselive.com/search/ecb/' +PLAYER_THUMB_URL_FMT = 'https://ecb-resources.s3.amazonaws.com/player-photos/{}/480x480/{}.png' +SEARCH_URL = 'https://content-ecb.pulselive.com/search/ecb/' +VIDEO_LIST_URL = 'https://content-ecb.pulselive.com/content/ecb/EN/' Video = namedtuple('Video', 'title url thumbnail date duration') +Entity = namedtuple('Entity', 'name reference thumbnail') + + +def _video_list_url(reference, page, page_size=10): + '''Returns a URL for a list of videos''' + url_parts = list(urlparse(VIDEO_LIST_URL)) + query_params = dict( + contentTypes='video', + references=reference if reference is not None else '', + page=page - 1, + pageSize=page_size + ) + url_parts[4] = urlencode(query_params) + return urlunparse(url_parts) -def _search_url(term, start, size): +def _search_url(term, page, page_size=10): '''Returns a URL for the JSON search api''' url_parts = list(urlparse(SEARCH_URL)) query_params = dict( type='VIDEO', fullObjectResponse=True, terms=term, - size=size, - start=start + size=page_size, + start=(page - 1) * page_size ) url_parts[4] = urlencode(query_params) return urlunparse(url_parts) @@ -78,12 +95,6 @@ def _date_from_str(date_str, fmt='%d %B %Y'): return datetime(*(time.strptime(date_str, fmt)[0:6])).date() -def _date(media_item): - '''Returns a date object from the HTML media item.''' - date_str = media_item.find('time', 'media__sub-meta').string - return _date_from_str(date_str) - - def _date_json(json_item): '''Returns a date object from the JSON item. The date can be one of two formats''' @@ -98,60 +109,107 @@ def _date_json(json_item): raise exc -def categories(): - '''Generator for category names and links, excluding all that appear before Home''' - start = False - for submenu_link in _soup()('a', 'submenu__link'): - title = submenu_link.string.strip() - if start and title != 'All Categories': - yield title, os.path.basename(submenu_link['href']) - if title == 'Home': - start = True +def _thumbnail_variant(video): + if video['thumbnail'] is None: + return + return (variant['url'] for variant in video['thumbnail']['variants'] + if variant['tag']['id'] == 981).next() -def videos(path): - '''Generator for all videos from a particular page''' - for media_item in _soup(path)('a', 'media__item'): - video = json.loads(media_item['data-ui-args']) - yield Video( - title=media_item.find('span', 'media__title').string, - url=HLS_URL_FMT.format(video['mediaId']), - thumbnail=media_item.picture.img['data-highres-img'], - date=_date(media_item), - duration=int(video['duration'].replace(',', '')) +def england(): + return Entity( + name='England', + reference='cricket_team:11', + thumbnail=None + ) + + +def counties(): + for county in _soup('/county-championship/teams')('div', 'partners__item'): + team_id = int(os.path.basename(county.a['href'])) + yield Entity( + name=county.a.text, + reference='cricket_team:{}'.format(team_id), + thumbnail=county.img['src'] + ) + + +def player_categories(): + for tab in _soup('/england/men/players').find_all( + 'div', attrs={'data-ui-args': re.compile(r'{ "title": "\w+" }')}): + yield Entity( + name=tab['data-ui-tab'], + reference=None, + thumbnail=None + ) + + +def players(category='Test'): + soup = _soup('/england/men/players').find('div', attrs={'data-ui-tab': category}) + for player in soup('section', 'profile-player-card'): + player_id = player.img['data-player'] + yield Entity( + name=player.img['alt'], + reference='cricket_player:{}'.format(player_id), + thumbnail=PLAYER_THUMB_URL_FMT.format(category.lower(), player_id) ) -def search_results(term, start=0, size=10): +def _video(video): + return Video( + title=video['title'], + url=HLS_URL_FMT.format(video['mediaId']), + thumbnail=_thumbnail_variant(video), + date=_date_json(video), + duration=video['duration'] + ) + + +def _videos(videos_json): + '''Generator for all videos from a particular page''' + for video in videos_json['content']: + yield _video(video) + + +def videos(reference=None, page=1, page_size=10): + videos_json = requests.get(_video_list_url(reference, page, page_size)).json() + npages = videos_json['pageInfo']['numPages'] + return _videos(videos_json), npages + + +def _search_results(search_results_json): '''Generator for videos matching a search term''' - results = requests.get(_search_url(term, start, size)).json()['hits']['hit'] + results = search_results_json['hits']['hit'] for result in results: video = result['response'] - yield Video( - title=video['title'], - url=HLS_URL_FMT.format(video['mediaId']), - thumbnail=video['imageUrl'], - date=_date_json(video), - duration=video['duration'] - ) + yield _video(video) + + +def search_results(term, page=1, page_size=10): + search_results_json = requests.get(_search_url(term, page, page_size)).json() + total = search_results_json['hits']['found'] + npages = int(math.ceil(float(total) / page_size)) + return _search_results(search_results_json), npages -def _print_all_videos(): +def _print_team_videos(): '''Test function to print all categories and videos''' - for title, path in categories(): - print '{} ({})'.format(title, path) - for video in videos(path): + for team in [england()] + list(counties()): + print '{} ({})'.format(team.name, team.reference) + videos_page, _num_pages = videos(team.reference) + for video in videos_page: print '\t', video.title def _print_search_results(term): '''Test function to print search results''' print 'Search: {}'.format(term) - for video in search_results(term): + videos_page, _num_pages = search_results(term) + for video in videos_page: print '\t', video.title if __name__ == '__main__': - _print_all_videos() + _print_team_videos() print _print_search_results('test cricket') |