(new feature) 0.2.0 - get an anime info from page

3 years ago · dadf74aa74
parent 2674ed9329
commit dadf74aa74
2 changed files with 53 additions and 0 deletions
--- a/CHANGELOG.org
+++ b/CHANGELOG.org
@ -7,3 +7,5 @@
   New small feature: get an anime entry
 ** 0.1.3 <2022-07-08>
   New small feature: add an anime entry if
+** 0.2.0 <2022-07-09>
+   New feature: get an anime info from page
--- a/src/scraper.py
+++ b/src/scraper.py
@ -0,0 +1,51 @@
+########################################################################## 
+# Copyright (C) 2022 Kostya Klochko <kostya_klochko@ukr.net>             #
+#                                                                        #
+# This file is part of Anitube Simple Notification.                      #
+#                                                                        #
+# Anitube Simple Notification is free software: you can redistribute     #
+# it and/or modify it under the terms of the GNU General Public          #
+# License as published by the Free Software Foundation, either version   #
+# 3 of the License, or (at your option) any later version.               #
+#                                                                        #
+# Anitube Simple Notification is distributed in the hope that it will    #
+# be useful, but WITHOUT ANY WARRANTY; without even the implied          #
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See   #
+# the GNU General Public License for more details.                       #
+#                                                                        #
+# You should have received a copy of the GNU General Public License      #
+# along with Anitube Simple Notification. If not, see                    #
+# <https://www.gnu.org/licenses/>.                                       #
+##########################################################################
+
+"""
+This module has all for simplify work with scraping.
+"""
+
+import requests
+from bs4 import BeautifulSoup
+
+class Scraper:
+    """The handler of web connection."""
+    def __init__(self, HEADERS):
+        """Initialising the connection information."""
+        self.HEADERS = HEADERS
+
+    def get_anime(self, url):
+        """
+        Return None if response is not 200.
+        Otherwise, return [url, title, status].
+        """
+        response = requests.get(url, headers=self.HEADERS)
+        if response.status_code != 200:
+            return None
+        soup = BeautifulSoup(response.content, 'html.parser')
+        data = soup.find('div', class_='rcol', style = 'width:701px; padding:0 0 0 6px;')
+        # Getting Title
+        title = data.find('h2').get_text(strip=True)
+        # Getting Status
+        str_find = "\nСерій: "
+        str_current = data.get_text()
+        str_current = str_current[str_current.find(str_find)+len(str_find):]
+        status = str_current[:str_current.find('\n')]
+        return [url, title, status]