mirror of
https://github.com/nichkara/InfinitumBotty.git
synced 2026-06-10 22:26:23 +02:00
41 lines
1.7 KiB
Python
41 lines
1.7 KiB
Python
import random
|
|
import urllib
|
|
import requests
|
|
import html
|
|
|
|
#Comic scraper scrapes comics from urls that have no website based random functionality. Comic URLs have to be in comics.py
|
|
class ComicScraper():
|
|
|
|
#Scrapers for specific websites follow here:
|
|
|
|
#scraper for Betamonkeys
|
|
def scrapeBetamonkeys(url):
|
|
#get latest comic id from the website, then generate a random number within the range of 1 and the latest comic.
|
|
#Finally generate a new comic url from that. I know this is dirty - But it works for a comic i guess ;)
|
|
r = requests.get(url)
|
|
comic_id_latest=r.content.decode("utf-8").split("http://betamonkeys.co.uk/wp-content/stripshow_comics/betamonkeys")[1].split(".png")[0]
|
|
random_comic_number=str(random.randint(1,int(comic_id_latest)))
|
|
random_comic_url="http://betamonkeys.co.uk/wp-content/stripshow_comics/betamonkeys"+random_comic_number+".png"
|
|
return random_comic_url+ " Betamonkeys "+ random_comic_number + " | Betamonkeys"
|
|
|
|
#scraper for Nichtlustig
|
|
def scrapeNichtlustig(url):
|
|
#TODO: Write a scraper for Nichtlustig!
|
|
return "Bisher kein Scraper für Nichtlustig."
|
|
|
|
#your custom scraper here
|
|
#def scrapeYourCustomComic(url):
|
|
#return "Your custom scraped URL"
|
|
|
|
|
|
|
|
#Main scraping function. Takes url, decides scraping method to use. If no scraping method is found: return "No parser found"
|
|
def getRandomComic(url):
|
|
if "betamonkeys.co.uk" in url:
|
|
return ComicScraper.scrapeBetamonkeys(url)
|
|
|
|
if "nichtlustig.de" in url:
|
|
return ComicScraper.scrapeNichtlustig(url)
|
|
|
|
else:
|
|
return "No parser found for comic URL: "+url |