From 832cb746454e5fe3c4ddd6f8a3645d41539b713b Mon Sep 17 00:00:00 2001 From: Context 77 <126421199+ctx77@users.noreply.github.com> Date: Thu, 1 Aug 2024 02:33:04 +0200 Subject: [PATCH] Adding youtube-special-case handling to extract title --- FaustBot/Modules/TitleObserver.py | 51 ++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/FaustBot/Modules/TitleObserver.py b/FaustBot/Modules/TitleObserver.py index becb665..708c105 100644 --- a/FaustBot/Modules/TitleObserver.py +++ b/FaustBot/Modules/TitleObserver.py @@ -18,12 +18,14 @@ class TitleObserver(PrivMsgObserverPrototype): def update_on_priv_msg(self, data, connection: Connection): regex = "(?Phttps?://[^\s]+)" - url = re.search(regex, data['messageCaseSensitive']) + url = re.search(regex, data["messageCaseSensitive"]) if url is not None: url = url.group() print(url) try: - headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'} + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36" + } url = url req = urllib.request.Request(url, None, headers) @@ -35,20 +37,35 @@ class TitleObserver(PrivMsgObserverPrototype): except Exception as exc: print(exc) pass - + def getTitle(self, resource): - encoding = resource.headers.get_content_charset() - # der erste Fall kann raus, wenn ein anderer Channel benutzt wird - if resource.geturl().find('rehakids.de') != -1: - encoding = 'windows-1252' - if not encoding: - encoding = 'utf-8' - content = resource.read().decode(encoding, errors='replace') + encoding = resource.headers.get_content_charset() + url = resource.geturl() + # der erste Fall kann raus, wenn ein anderer Channel benutzt wird + if url.find("rehakids.de") != -1: + encoding = "windows-1252" + if not encoding: + encoding = "utf-8" + content = resource.read().decode(encoding, errors="replace") + + if re.search("http[s]+://[^/]*youtube.com/", url): + title_re = re.compile( + '''"results":{"contents":\[{"videoPrimaryInfoRenderer":{"title":{"runs":\[{"text":"([^"]*)"''' + ) + else: title_re = re.compile("(.+?)") - title = title_re.search(content).group(1) - title = html.unescape(title) - title = title.replace('\n', ' ').replace('\r', '') - title = title.replace("<", "<") - title = title.replace(">", ">") - title = title.replace("&", "&") - return title + + title_matches = title_re.search(content) + if title_matches: + title = title_matches.group(1) + else: + return "Could not Parse Title" + + title = html.unescape(title) + title = title.replace("\n", " ").replace("\r", "") + title = title.replace("<", "<") + title = title.replace(">", ">") + title = title.replace("&", "&") + if title == "": + title = "Empty Title" + return title