diff options
author | Christian Kölpin <raptor2101@mykolab.com> | 2017-04-05 00:29:40 +0200 |
---|---|---|
committer | enen92 <enen92@users.noreply.github.com> | 2017-04-04 23:29:40 +0100 |
commit | fb03a9c73debfb78ae6a3717b4677cd455a4c37b (patch) | |
tree | 5b6117aafab9698d7861306ae9c9dcd091d96e24 /plugin.video.mediathek | |
parent | 712d35b06c97feb7c9342c792243126104ac3daa (diff) |
[plugin.video.mediathek] 0.8.2 (#1106)
Diffstat (limited to 'plugin.video.mediathek')
-rw-r--r-- | plugin.video.mediathek/addon.xml | 2 | ||||
-rw-r--r-- | plugin.video.mediathek/changelog.txt | 1 | ||||
-rw-r--r-- | plugin.video.mediathek/mediathek/arte.py | 10 | ||||
-rw-r--r-- | plugin.video.mediathek/mediathek/orf.py | 172 | ||||
-rw-r--r-- | plugin.video.mediathek/simplexbmc.py | 2 |
5 files changed, 65 insertions, 122 deletions
diff --git a/plugin.video.mediathek/addon.xml b/plugin.video.mediathek/addon.xml index da032f8..abd64e4 100644 --- a/plugin.video.mediathek/addon.xml +++ b/plugin.video.mediathek/addon.xml @@ -2,7 +2,7 @@ <addon id="plugin.video.mediathek" name="Mediathek" - version="0.8.1" + version="0.8.2" provider-name="Raptor 2101"> <requires> <import addon="xbmc.python" version="2.25.0"/> diff --git a/plugin.video.mediathek/changelog.txt b/plugin.video.mediathek/changelog.txt index 77edc82..4d5387b 100644 --- a/plugin.video.mediathek/changelog.txt +++ b/plugin.video.mediathek/changelog.txt @@ -1,3 +1,4 @@ +0.8.2 - CHG: Reimplement ORF 0.8.1 - CHG: Recover ARTE Search and Browsing Functionality 0.8.0 - CHG: Version pump to seperate pre-krypton version and krypton version 0.7.5 - CHG: reimplement ZDF Mediathak (important: ZDF enforces TLSv1 with SNI enabled, what is only supported by python 2.7.9.) diff --git a/plugin.video.mediathek/mediathek/arte.py b/plugin.video.mediathek/mediathek/arte.py index ac3fec6..4c61c4e 100644 --- a/plugin.video.mediathek/mediathek/arte.py +++ b/plugin.video.mediathek/mediathek/arte.py @@ -89,7 +89,7 @@ class ARTEMediathek(Mediathek): link = self.serachLink%searchText; pageContent = self.loadPage(link).decode('UTF-8'); content = self.searchContent.search(pageContent).group(1); - content = BeautifulSoup(content); + content = BeautifulSoup(content,"html.parser"); jsonContent = json.loads(content.prettify(formatter=None)); linkCount = len(jsonContent["programs"]); for jsonObject in jsonContent["programs"]: @@ -138,7 +138,7 @@ class ARTEMediathek(Mediathek): for name,regex in self.categories.iteritems(): match = regex.search(pageContent); if(match is not None): - content = BeautifulSoup(match.group(1)); + content = BeautifulSoup(match.group(1),"html.parser"); jsonContent = json.loads(content.prettify(formatter=None)) if(isinstance(jsonContent,list)): self.buildJsonLink(name,jsonContent) @@ -154,7 +154,7 @@ class ARTEMediathek(Mediathek): match = regex.search(htmlPage); if(match is not None): someMatch = True; - content = BeautifulSoup(match.group(1)); + content = BeautifulSoup(match.group(1),"html.parser"); self.gui.log(content.prettify(formatter=None)); jsonContent = json.loads(content.prettify(formatter=None)) self.extractVideoLinksFromJson(jsonContent) @@ -167,14 +167,14 @@ class ARTEMediathek(Mediathek): def showCluster(self): pageContent = self.loadPage(self.basePage).decode('UTF-8'); - content = BeautifulSoup(self.regex_cluster.search(pageContent).group(1)); + content = BeautifulSoup(self.regex_cluster.search(pageContent).group(1),"html.parser"); jsonContent = json.loads(content.prettify(formatter=None)) for menuItem in jsonContent: self.buildMenuEntry(menuItem); def showCategories(self): pageContent = self.loadPage(self.basePage).decode('UTF-8'); - content = BeautifulSoup(self.regex_categories.search(pageContent).group(1)); + content = BeautifulSoup(self.regex_categories.search(pageContent).group(1),"html.parser"); jsonContent = json.loads(content.prettify(formatter=None)) for jsonObject in jsonContent: jsonCategorie = jsonObject["category"] diff --git a/plugin.video.mediathek/mediathek/orf.py b/plugin.video.mediathek/mediathek/orf.py index 1146abf..ef7df1b 100644 --- a/plugin.video.mediathek/mediathek/orf.py +++ b/plugin.video.mediathek/mediathek/orf.py @@ -15,10 +15,9 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -import re,time,urllib -from xml.dom import Node; -from xml.dom import minidom; +import re,json,urllib; from mediathek import * +from bs4 import BeautifulSoup; class ORFMediathek(Mediathek): def __init__(self, simpleXbmcGui): @@ -27,44 +26,19 @@ class ORFMediathek(Mediathek): self.gui = simpleXbmcGui; self.menuTree = []; self.menuTree.append(TreeNode("0","Startseite","http://tvthek.orf.at/",True)); + self.menuTree.append(TreeNode("1","Sendungen","http://tvthek.orf.at/profiles/a-z",True)); - menuPage = self.loadPage(self.rootLink+"/programs"); - findMenuLink = re.compile("<li><a href=\"(/programs/.*?)\" title=\".*?\">(.*?)</a></li>"); - findCategorie = re.compile("<h4>(.*?)</h4>\\s*?<ul>((\\s*?%s\\s*?)+)</ul>"%findMenuLink.pattern) - categories = []; - for categorieMatch in findCategorie.finditer(menuPage): - title = categorieMatch.group(1); - items = []; - for menuMatch in findMenuLink.finditer(categorieMatch.group(2)): - items.append(TreeNode("1.%d.%d"%(len(categories),len(items)), menuMatch.group(2),"%s%s"%(self.rootLink,menuMatch.group(1)),True)); - categories.append(TreeNode("1.%d"%len(categories), title,"",False,items)); + self.searchLink = "http://tvthek.orf.at/search?q=%s" - self.menuTree.append(TreeNode("1","Sendungen","",False,categories)); - videoLinkPage = "/programs/.*" - imageLink = "http://tvthek.orf.at/assets/.*?.jpeg" - self.regex_extractVideoPageLink = re.compile(videoLinkPage+"?\""); - self.regex_extractImageLink = re.compile(imageLink); - self.regex_extractTitle = re.compile("<strong>.*<span"); - self.regex_extractVideoLink = re.compile("/programs/.*.asx"); - self.regex_extractVideoObject = re.compile("<a href=\""+videoLinkPage+"\" title=\".*\">\\s*<span class=\"spcr\">\\s*<img src=\""+imageLink+"\" title=\".*\" alt=\".*\" />\\s*<span class=\".*\"></span>\\s*<strong>.*<span class=\"nowrap duration\">.*</span></strong>\\s*<span class=\"desc\">.*</span>\\s*</span>\\s*</a>"); + self.regex_extractProfileSites = re.compile("<a class=\"item_inner clearfix\"\s*?href=\"(http://tvthek.orf.at/profile/.*?/\d+)\".*src=\"(http://api-tvthek.orf.at/uploads/media/profiles/.*?_profiles_list.jpeg)\"(.|\s)*?<h4 class=\"item_title\">(.*?)</h4>"); + self.regex_extractTopicSites = re.compile("<a href=\"(http://tvthek.orf.at/topic/.*?/\d+)\"\s*?title=\"(.*?)\"\s*?class=\"more"); + self.regex_extractVideoPages = re.compile("<a href=\"(http://tvthek.orf.at/.*?/\d+)\""); + self.regex_extractJson = re.compile("data-jsb=\"({"videoplayer_id".*})\">"); - self.regex_extractSearchObject = re.compile("<li class=\"clearfix\">\\s*<a href=\".*\" title=\".*\" class=\".*\"><img src=\".*\" alt=\".*\" /><span class=\"btn_play\">.*</span></a>\\s*<p>.*</p>\\s*<h4><a href=\".*\" title=\".*\">.*</a></h4>\\s*<p><a href=\".*\" title=\".*\"></a></p>\\s*</li>"); - - self.regex_extractProgrammLink = re.compile("/programs/.*?\""); - self.regex_extractProgrammTitle = re.compile("title=\".*?\""); - self.regex_extractProgrammPicture = re.compile("/binaries/asset/segments/\\d*/image1"); - - self.regex_extractFlashVars = re.compile("ORF.flashXML = '.*?'"); - self.regex_extractHiddenDate = re.compile("\d{4}-\d{2}-\d{2}"); - self.regex_extractXML = re.compile("%3C.*%3E"); - self.regex_extractReferingSites = re.compile("<li><a href=\"/programs/\d+.*?/episodes/\d+.*?\""); - - self.replace_html = re.compile("<.*?>"); - self.searchLink = "http://tvthek.orf.at/search?q=" @classmethod def name(self): return "ORF"; @@ -72,91 +46,59 @@ class ORFMediathek(Mediathek): def isSearchable(self): return True; - def createVideoLink(self,title,image,videoPageLink,elementCount): - videoPage = self.loadPage(self.rootLink+videoPageLink); - - videoLink = self.regex_extractVideoLink.search(videoPage); - if(videoLink == None): - return; - - simpleLink = SimpleLink(self.rootLink+videoLink.group(), 0); - videoLink = {0:simpleLink}; - counter = 0 - playlist = self.loadPage(simpleLink.basePath); - for line in playlist: - counter+=1; - - if(counter == 1): - self.gui.buildVideoLink(DisplayObject(title,"",image,"",videoLink, True, time.gmtime()),self,elementCount); - else: - self.gui.buildVideoLink(DisplayObject(title,"",image,"",videoLink, "PlayList", time.gmtime()),self,elementCount); - def searchVideo(self, searchText): - link = self.searchLink = "http://tvthek.orf.at/search?q="+searchText; - mainPage = self.loadPage(link); - result = self.regex_extractSearchObject.findall(mainPage); - for searchObject in result: - videoLink = self.regex_extractProgrammLink.search(searchObject).group().replace("\"",""); - title = self.regex_extractProgrammTitle.search(searchObject).group().replace("title=\"","").replace("\"",""); - title = title.decode("UTF-8"); - pictureLink = self.regex_extractProgrammPicture.search(searchObject).group(); - - print videoLink; + self.buildPageMenu(self.searchLink%urllib.quote(searchText.encode('UTF-8')),0); - self.createVideoLink(title,pictureLink,videoLink, len(result)); - - def extractLinksFromFlashXml(self, flashXml, date, elementCount): - print flashXml.toprettyxml().encode('UTF-8'); - playlistNode = flashXml.getElementsByTagName("Playlist")[0]; - linkNode=flashXml.getElementsByTagName("AsxUrl")[0]; - link=linkNode.firstChild.data; - asxLink = SimpleLink(self.rootLink+link,0); - videoLink = {0:asxLink}; - for videoItem in playlistNode.getElementsByTagName("Items")[0].childNodes: - if(videoItem.nodeType == Node.ELEMENT_NODE): - titleNode=videoItem.getElementsByTagName("Title")[0]; - - descriptionNode=videoItem.getElementsByTagName("Description")[0]; - title=titleNode.firstChild.data; - - stringArray = link.split("mp4:"); + def extractVideoLinks(self,videoPageLinks,elementCount): + for videoPageLink in videoPageLinks: + videoPage = self.loadPage(videoPageLink.group(1)); + jsonContent = self.regex_extractJson.search(videoPage); + if(jsonContent == None): + return; + jsonContent = jsonContent.group(1); + jsonContent = BeautifulSoup(jsonContent,"html.parser"); + jsonContent = json.loads(jsonContent.prettify(formatter=None).encode('UTF-8')); + jsonContent = jsonContent["selected_video"]; + title = jsonContent["title"]; + pictureLink = jsonContent["preview_image_url"]; + + videoLinks={}; + + for source in jsonContent["sources"]: + if(source["protocol"] == "http"): + quality = source["quality"]; + url = source["src"]; + if(quality == "Q1A"): + videoLinks[0] = SimpleLink(url, -1); + if(quality == "Q4A"): + videoLinks[1] = SimpleLink(url, -1); + if(quality == "Q6A"): + videoLinks[2] = SimpleLink(url, -1); + if(quality == "Q8C"): + videoLinks[3] = SimpleLink(url, -1); + if("title_separator" in jsonContent): + titleSeperator = jsonContent["title_separator"]; + titleArray = title.split(titleSeperator); try: - description=descriptionNode.firstChild.data; - except: - description=""; - self.gui.buildVideoLink(DisplayObject(title,"","",description,videoLink, True, date),self,elementCount); - def extractFlashLinks(self, flashVars,videoPageLinks,elementCount): - for flashVar in flashVars: - encodedXML = self.regex_extractXML.search(flashVar).group(); - dateString = self.regex_extractHiddenDate.search(flashVar).group(); - date = time.strptime(dateString,"%Y-%m-%d"); - parsedXML = minidom.parseString(urllib.unquote(encodedXML)); - self.extractLinksFromFlashXml(parsedXML, date,elementCount); - for videoPageLink in videoPageLinks: - videoPageLink = self.rootLink+videoPageLink.replace("<li><a href=\"","").replace("\"",""); - print videoPageLink; - videoPage = self.loadPage(videoPageLink); - flashVars = self.regex_extractFlashVars.findall(videoPage); - for flashVar in flashVars: - encodedXML = self.regex_extractXML.search(flashVar).group(); - dateString = self.regex_extractHiddenDate.search(flashVar).group(); - date = time.strptime(dateString,"%Y-%m-%d"); - parsedXML = minidom.parseString(urllib.unquote(encodedXML)); - self.extractLinksFromFlashXml(parsedXML,date,elementCount); + title = titleArray[0]; + subTitle = titleArray[1]; + except IndexError: + subTitle = ""; + self.gui.buildVideoLink(DisplayObject(title,subTitle,pictureLink,"",videoLinks, True, None),self,elementCount); + else: + self.gui.buildVideoLink(DisplayObject(title,None,pictureLink,"",videoLinks, True, None),self,elementCount); + def buildPageMenu(self, link, initCount): mainPage = self.loadPage(link); - videoPageLinks = self.regex_extractReferingSites.findall(mainPage); - flashVars = self.regex_extractFlashVars.findall(mainPage); - links = self.regex_extractVideoObject.findall(mainPage); - elementCount = initCount + len(links)+len(flashVars)+len(videoPageLinks); - self.extractFlashLinks(flashVars,videoPageLinks,elementCount); - for linkObject in links: - - videoLink = self.regex_extractVideoPageLink.search(linkObject).group().replace("\"",""); - image = self.regex_extractImageLink.search(linkObject).group(); - title = self.regex_extractTitle.search(linkObject).group().decode('UTF8'); - title = self.replace_html.sub("", title); - title = title.replace(" <span",""); - self.createVideoLink(title,image,videoLink, elementCount); + + for topic in self.regex_extractTopicSites.finditer(mainPage): + self.gui.buildVideoLink(DisplayObject(topic.group(2),None,None,"",topic.group(1), False, None),self,0); + initCount=initCount+1; + for profile in self.regex_extractProfileSites.finditer(mainPage): + self.gui.buildVideoLink(DisplayObject(profile.group(4),None,profile.group(2),"",profile.group(1), False, None),self,0); + initCount=initCount+1; + videoPageLinks = list(self.regex_extractVideoPages.finditer(mainPage)); + + self.extractVideoLinks(videoPageLinks,len(videoPageLinks)+initCount); diff --git a/plugin.video.mediathek/simplexbmc.py b/plugin.video.mediathek/simplexbmc.py index e55d0d8..1529b83 100644 --- a/plugin.video.mediathek/simplexbmc.py +++ b/plugin.video.mediathek/simplexbmc.py @@ -99,7 +99,7 @@ class SimpleXbmcGui(object): return metaData; def transformHtmlCodes(self, content): - return BeautifulSoup(content).prettify(formatter=None); + return BeautifulSoup(content,"html.parser").prettify(formatter=None); def buildMenuLink(self,menuObject,mediathek,objectCount): title = menuObject.name; |