ninachloe/FaustBot/Modules/ComicScraper.py

import random
import urllib
import requests
import html

#Comic scraper scrapes comics from urls that have no website based random functionality. Comic URLs have to be in comics.py
class ComicScraper():

    #Scrapers for specific websites follow here:

    #scraper for Betamonkeys
    def scrapeBetamonkeys(url):
        #get latest comic id from the website, then generate a random number within the range of 1 and the latest comic.
        #Finally generate a new comic url from that. I know this is dirty - But it works for a comic i guess ;)
        r = requests.get(url)
        comic_id_latest=r.content.decode("utf-8").split("http://betamonkeys.co.uk/wp-content/stripshow_comics/betamonkeys")[1].split(".png")[0]
        random_comic_number=str(random.randint(1,int(comic_id_latest)))
        random_comic_url="http://betamonkeys.co.uk/wp-content/stripshow_comics/betamonkeys"+random_comic_number+".png"
        return random_comic_url+ " Betamonkeys "+ random_comic_number + " | Betamonkeys"

    #scraper for Nichtlustig
    def scrapeNichtlustig(url):
        #TODO: Write a scraper for Nichtlustig!
        return "Bisher kein Scraper für Nichtlustig."

    #your custom scraper here
    #def scrapeYourCustomComic(url):
        #return "Your custom scraped URL"


    #Main scraping function. Takes url, decides scraping method to use. If no scraping method is found: return "No parser found"
    def getRandomComic(url):
        if "betamonkeys.co.uk" in url:
            return ComicScraper.scrapeBetamonkeys(url)

        if "nichtlustig.de" in url:
            return ComicScraper.scrapeNichtlustig(url)

        else:
            return "No parser found for comic URL: "+url