mirror of
https://github.com/nichkara/InfinitumBotty.git
synced 2026-06-11 06:36:24 +02:00
Adding youtube-special-case handling to extract title
This commit is contained in:
@@ -18,12 +18,14 @@ class TitleObserver(PrivMsgObserverPrototype):
|
|||||||
|
|
||||||
def update_on_priv_msg(self, data, connection: Connection):
|
def update_on_priv_msg(self, data, connection: Connection):
|
||||||
regex = "(?P<url>https?://[^\s]+)"
|
regex = "(?P<url>https?://[^\s]+)"
|
||||||
url = re.search(regex, data['messageCaseSensitive'])
|
url = re.search(regex, data["messageCaseSensitive"])
|
||||||
if url is not None:
|
if url is not None:
|
||||||
url = url.group()
|
url = url.group()
|
||||||
print(url)
|
print(url)
|
||||||
try:
|
try:
|
||||||
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'}
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
|
||||||
|
}
|
||||||
|
|
||||||
url = url
|
url = url
|
||||||
req = urllib.request.Request(url, None, headers)
|
req = urllib.request.Request(url, None, headers)
|
||||||
@@ -38,17 +40,32 @@ class TitleObserver(PrivMsgObserverPrototype):
|
|||||||
|
|
||||||
def getTitle(self, resource):
|
def getTitle(self, resource):
|
||||||
encoding = resource.headers.get_content_charset()
|
encoding = resource.headers.get_content_charset()
|
||||||
|
url = resource.geturl()
|
||||||
# der erste Fall kann raus, wenn ein anderer Channel benutzt wird
|
# der erste Fall kann raus, wenn ein anderer Channel benutzt wird
|
||||||
if resource.geturl().find('rehakids.de') != -1:
|
if url.find("rehakids.de") != -1:
|
||||||
encoding = 'windows-1252'
|
encoding = "windows-1252"
|
||||||
if not encoding:
|
if not encoding:
|
||||||
encoding = 'utf-8'
|
encoding = "utf-8"
|
||||||
content = resource.read().decode(encoding, errors='replace')
|
content = resource.read().decode(encoding, errors="replace")
|
||||||
|
|
||||||
|
if re.search("http[s]+://[^/]*youtube.com/", url):
|
||||||
|
title_re = re.compile(
|
||||||
|
'''"results":{"contents":\[{"videoPrimaryInfoRenderer":{"title":{"runs":\[{"text":"([^"]*)"'''
|
||||||
|
)
|
||||||
|
else:
|
||||||
title_re = re.compile("<title>(.+?)</title>")
|
title_re = re.compile("<title>(.+?)</title>")
|
||||||
title = title_re.search(content).group(1)
|
|
||||||
|
title_matches = title_re.search(content)
|
||||||
|
if title_matches:
|
||||||
|
title = title_matches.group(1)
|
||||||
|
else:
|
||||||
|
return "Could not Parse Title"
|
||||||
|
|
||||||
title = html.unescape(title)
|
title = html.unescape(title)
|
||||||
title = title.replace('\n', ' ').replace('\r', '')
|
title = title.replace("\n", " ").replace("\r", "")
|
||||||
title = title.replace("<", "<")
|
title = title.replace("<", "<")
|
||||||
title = title.replace(">", ">")
|
title = title.replace(">", ">")
|
||||||
title = title.replace("&", "&")
|
title = title.replace("&", "&")
|
||||||
|
if title == "":
|
||||||
|
title = "Empty Title"
|
||||||
return title
|
return title
|
||||||
|
|||||||
Reference in New Issue
Block a user