Files
ninachloe/FaustBot/Modules/ComicScraper.py
2022-03-31 15:21:47 +02:00

41 lines
1.7 KiB
Python

import random
import urllib
import requests
import html
#Comic scraper scrapes comics from urls that have no website based random functionality. Comic URLs have to be in comics.py
class ComicScraper():
#Scrapers for specific websites follow here:
#scraper for Betamonkeys
def scrapeBetamonkeys(url):
#get latest comic id from the website, then generate a random number within the range of 1 and the latest comic.
#Finally generate a new comic url from that. I know this is dirty - But it works for a comic i guess ;)
r = requests.get(url)
comic_id_latest=r.content.decode("utf-8").split("http://betamonkeys.co.uk/wp-content/stripshow_comics/betamonkeys")[1].split(".png")[0]
random_comic_number=str(random.randint(1,int(comic_id_latest)))
random_comic_url="http://betamonkeys.co.uk/wp-content/stripshow_comics/betamonkeys"+random_comic_number+".png"
return random_comic_url+ " Betamonkeys "+ random_comic_number + " | Betamonkeys"
#scraper for Nichtlustig
def scrapeNichtlustig(url):
#TODO: Write a scraper for Nichtlustig!
return "Bisher kein Scraper für Nichtlustig."
#your custom scraper here
#def scrapeYourCustomComic(url):
#return "Your custom scraped URL"
#Main scraping function. Takes url, decides scraping method to use. If no scraping method is found: return "No parser found"
def getRandomComic(url):
if "betamonkeys.co.uk" in url:
return ComicScraper.scrapeBetamonkeys(url)
if "nichtlustig.de" in url:
return ComicScraper.scrapeNichtlustig(url)
else:
return "No parser found for comic URL: "+url