diff --git a/CHANGELOG.org b/CHANGELOG.org index caa90f8..7cc85a0 100644 --- a/CHANGELOG.org +++ b/CHANGELOG.org @@ -7,3 +7,5 @@ New small feature: get an anime entry ** 0.1.3 <2022-07-08> New small feature: add an anime entry if +** 0.2.0 <2022-07-09> + New feature: get an anime info from page diff --git a/src/scraper.py b/src/scraper.py new file mode 100644 index 0000000..ef336f9 --- /dev/null +++ b/src/scraper.py @@ -0,0 +1,51 @@ +########################################################################## +# Copyright (C) 2022 Kostya Klochko # +# # +# This file is part of Anitube Simple Notification. # +# # +# Anitube Simple Notification is free software: you can redistribute # +# it and/or modify it under the terms of the GNU General Public # +# License as published by the Free Software Foundation, either version # +# 3 of the License, or (at your option) any later version. # +# # +# Anitube Simple Notification is distributed in the hope that it will # +# be useful, but WITHOUT ANY WARRANTY; without even the implied # +# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See # +# the GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with Anitube Simple Notification. If not, see # +# . # +########################################################################## + +""" +This module has all for simplify work with scraping. +""" + +import requests +from bs4 import BeautifulSoup + +class Scraper: + """The handler of web connection.""" + def __init__(self, HEADERS): + """Initialising the connection information.""" + self.HEADERS = HEADERS + + def get_anime(self, url): + """ + Return None if response is not 200. + Otherwise, return [url, title, status]. + """ + response = requests.get(url, headers=self.HEADERS) + if response.status_code != 200: + return None + soup = BeautifulSoup(response.content, 'html.parser') + data = soup.find('div', class_='rcol', style = 'width:701px; padding:0 0 0 6px;') + # Getting Title + title = data.find('h2').get_text(strip=True) + # Getting Status + str_find = "\nСерій: " + str_current = data.get_text() + str_current = str_current[str_current.find(str_find)+len(str_find):] + status = str_current[:str_current.find('\n')] + return [url, title, status]