From b8db5ed4627cf8ae32e94bb15a181cf283e070b7 Mon Sep 17 00:00:00 2001
From: Context 77 <126421199+ctx77@users.noreply.github.com>
Date: Thu, 1 Aug 2024 12:48:03 +0200
Subject: [PATCH 1/5] add parsing for music.youtube, youtube/shorts, security:
Do not parse url with ips/ports
---
FaustBot/Modules/TitleObserver.py | 57 ++++++++++++++++++++-----------
1 file changed, 37 insertions(+), 20 deletions(-)
diff --git a/FaustBot/Modules/TitleObserver.py b/FaustBot/Modules/TitleObserver.py
index 708c105..413842d 100644
--- a/FaustBot/Modules/TitleObserver.py
+++ b/FaustBot/Modules/TitleObserver.py
@@ -23,14 +23,7 @@ class TitleObserver(PrivMsgObserverPrototype):
url = url.group()
print(url)
try:
- headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
- }
-
- url = url
- req = urllib.request.Request(url, None, headers)
- resource = urllib.request.urlopen(req)
- title = self.getTitle(resource)
+ title = self.getTitle(url)
print(title)
title = title[:350]
connection.send_back(title, data)
@@ -38,28 +31,52 @@ class TitleObserver(PrivMsgObserverPrototype):
print(exc)
pass
- def getTitle(self, resource):
- encoding = resource.headers.get_content_charset()
- url = resource.geturl()
- # der erste Fall kann raus, wenn ein anderer Channel benutzt wird
- if url.find("rehakids.de") != -1:
- encoding = "windows-1252"
- if not encoding:
- encoding = "utf-8"
- content = resource.read().decode(encoding, errors="replace")
+ def getTitle(self, url):
+ headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
+ }
- if re.search("http[s]+://[^/]*youtube.com/", url):
+ if re.search("https?://\\[[^/]*", url):
+ raise (Exception("Refusing to parse bare IPv6 Addresses"))
+ if re.search("https?://[^/:]*:[^/:]*", url):
+ raise (Exception("Refusing to parse URLs with Ports"))
+ if re.search("https?://[0-9]+.[0-9]+.[0-9]+.[^/]*", url):
+ raise (Exception("Refusing to parse bare IPv4 Addresses"))
+ if re.search("https?://music.youtube.com/", url):
+ url = url.replace("music.youtube.com/", "www.youtube.com/", 1)
+
+ if re.search("https?://[^/]*youtube.com/shorts/", url):
+ title_re = re.compile('''"reelPlayerHeaderRenderer":{"reelTitleText":{"runs":\[{"text":"([^"]*)"''')
+ headers["User-Agent"] = "curl/7.81.0"
+ elif re.search("https?://[^/]*youtube.com/", url):
title_re = re.compile(
'''"results":{"contents":\[{"videoPrimaryInfoRenderer":{"title":{"runs":\[{"text":"([^"]*)"'''
)
else:
title_re = re.compile("
(.+?)")
+ req = urllib.request.Request(url, None, headers)
+
+ # Keep the urlopen scope as short as possible (connection leaks)
+ with urllib.request.urlopen(req, timeout=10) as response:
+ encoding = response.headers.get_content_charset()
+ content_raw = response.read()
+
+ # der erste Fall kann raus, wenn ein anderer Channel benutzt wird
+ if url.find("rehakids.de") != -1:
+ encoding = "windows-1252"
+ if not encoding:
+ encoding = "utf-8"
+
+ content = content_raw.decode(encoding, errors="replace")
+
title_matches = title_re.search(content)
if title_matches:
title = title_matches.group(1)
else:
- return "Could not Parse Title"
+ #with open("content.html", "w") as file:
+ # file.write(content)
+ raise Exception("Could not Parse Title for {}".format(url))
title = html.unescape(title)
title = title.replace("\n", " ").replace("\r", "")
@@ -67,5 +84,5 @@ class TitleObserver(PrivMsgObserverPrototype):
title = title.replace(">", ">")
title = title.replace("&", "&")
if title == "":
- title = "Empty Title"
+ raise Exception("Empty Title for {}".format(url))
return title
From 0ad43684a8b54005b155dea8c123b307c920b279 Mon Sep 17 00:00:00 2001
From: Context 77 <126421199+ctx77@users.noreply.github.com>
Date: Thu, 1 Aug 2024 12:49:18 +0200
Subject: [PATCH 2/5] fix python-warning about 'is not' use
---
FaustBot/StringBuffer.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/FaustBot/StringBuffer.py b/FaustBot/StringBuffer.py
index 2c783bd..2c93fba 100644
--- a/FaustBot/StringBuffer.py
+++ b/FaustBot/StringBuffer.py
@@ -10,7 +10,7 @@ class StringBuffer:
ready = list()
# Python do-while-loop
idx = self._buffer.find('\n')
- while idx is not -1:
+ while idx != -1:
data = self._buffer[0:idx] #
data = data.strip()
if len(data) >= 1:
From cab4113a86f3b29b2813f191cc12e81a11a12132 Mon Sep 17 00:00:00 2001
From: Context 77 <126421199+ctx77@users.noreply.github.com>
Date: Thu, 1 Aug 2024 13:49:37 +0200
Subject: [PATCH 3/5] fix titles with react attributes
---
FaustBot/Modules/TitleObserver.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/FaustBot/Modules/TitleObserver.py b/FaustBot/Modules/TitleObserver.py
index 413842d..10e91bf 100644
--- a/FaustBot/Modules/TitleObserver.py
+++ b/FaustBot/Modules/TitleObserver.py
@@ -53,7 +53,7 @@ class TitleObserver(PrivMsgObserverPrototype):
'''"results":{"contents":\[{"videoPrimaryInfoRenderer":{"title":{"runs":\[{"text":"([^"]*)"'''
)
else:
- title_re = re.compile("(.+?)")
+ title_re = re.compile("]*>(.+?)")
req = urllib.request.Request(url, None, headers)
@@ -74,8 +74,8 @@ class TitleObserver(PrivMsgObserverPrototype):
if title_matches:
title = title_matches.group(1)
else:
- #with open("content.html", "w") as file:
- # file.write(content)
+ # with open("content.html", "w") as file:
+ # file.write(content)
raise Exception("Could not Parse Title for {}".format(url))
title = html.unescape(title)
From e0ab7b41e8cfdcc02a4843fec7c2001d818beeee Mon Sep 17 00:00:00 2001
From: Context 77 <126421199+ctx77@users.noreply.github.com>
Date: Thu, 1 Aug 2024 15:05:54 +0200
Subject: [PATCH 4/5] add schwipschwap
---
getraenkeOnlyGoodOnes.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/getraenkeOnlyGoodOnes.py b/getraenkeOnlyGoodOnes.py
index c8ae1f8..e9f9c64 100644
--- a/getraenkeOnlyGoodOnes.py
+++ b/getraenkeOnlyGoodOnes.py
@@ -32,6 +32,6 @@ getraenkegoodones = ['einen Kaffee in einer Tasse aus blauem Porzellan','eine Li
"einen Apfelkuchen", "einen Pudding-Streuselkuchen", "einen Rhabarberstreuselkuchen",
'ein Glas kalten Rooibos Tee mit Vanille verfeinert','eine bunt karierte Tasse Kaffee', 'einen Rainbow Cocktail in einem hohen Glas mit Schirmchen',
'eine Apfelschorle mit frisch gepresstem Apfelsaft', 'eine bunt gestreifte Tasse Tee',
- 'ein Stilglas mit alkoholfreiem Sekt'
+ 'ein Stilglas mit alkoholfreiem Sekt', 'ein Glas mit erfrischend sprudelndem SchwipSchwap'
]
From b015d1f94ec62f086feba8ef1d614f97f66bcecd Mon Sep 17 00:00:00 2001
From: Context 77 <126421199+ctx77@users.noreply.github.com>
Date: Thu, 1 Aug 2024 23:12:17 +0200
Subject: [PATCH 5/5] remove schrotfl.
---
essen.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/essen.py b/essen.py
index 6598411..fe24843 100644
--- a/essen.py
+++ b/essen.py
@@ -195,7 +195,6 @@ essen = [
'einen winzigen Muffin, der nicht so weich ist, wie er aussieht','ein Blech Papageienkuchen',
'eine schief gewachsene gelbe Paprika','zwei gefüllte rote Paprikas','einen Teller weißen Schichtkohl',
'Jakobsmuscheln mit schwarzem Trüffel und Haselnuss','einen in Miso marinierten und gebackenen schwarzen Kabeljau',
- 'Elmer Fudd mit einer auf dich gerichteten Schrotflinte','Pekingente mit Hoisin-Sauce',
'klassisch in Bierteig frittierte Fish`n Chips','ein Stilleben aus Wachs','eine Schale ungewürztes Erbsenpüree',
'ein gegrilltes Hähnchen mit einer würzigen Tomaten-Curry-Soße','eine kaum Licht spendende Waltranfunsel',
'gedämpfte Miesmuscheln mit knusprigen Pommes','einen Geburtstagskuchen mit zu wenig Kerzen',
@@ -284,6 +283,7 @@ essen = [
'einen dreckigen Deal','ein einschneidendes Erfolgserlebnis','einen Berg fettige Ringelpommes',
'eine meterlange Gurkenspirale','eine Schale fein pürierter Selbstkontrolle','Nougat in Form von Meeresfrüchten',
'einen Topf Matzah-Ball-Suppe','einen Laib geflochtenes Challah','eine XXL-Packung Merci',
+ 'Pekingente mit Hoisin-Sauce',
]