(new feature) 0.2.0 - get an anime info from page

main
KKlochko 3 years ago
parent 2674ed9329
commit dadf74aa74

@ -7,3 +7,5 @@
New small feature: get an anime entry
** 0.1.3 <2022-07-08>
New small feature: add an anime entry if
** 0.2.0 <2022-07-09>
New feature: get an anime info from page

@ -0,0 +1,51 @@
##########################################################################
# Copyright (C) 2022 Kostya Klochko <kostya_klochko@ukr.net> #
# #
# This file is part of Anitube Simple Notification. #
# #
# Anitube Simple Notification is free software: you can redistribute #
# it and/or modify it under the terms of the GNU General Public #
# License as published by the Free Software Foundation, either version #
# 3 of the License, or (at your option) any later version. #
# #
# Anitube Simple Notification is distributed in the hope that it will #
# be useful, but WITHOUT ANY WARRANTY; without even the implied #
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See #
# the GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with Anitube Simple Notification. If not, see #
# <https://www.gnu.org/licenses/>. #
##########################################################################
"""
This module has all for simplify work with scraping.
"""
import requests
from bs4 import BeautifulSoup
class Scraper:
"""The handler of web connection."""
def __init__(self, HEADERS):
"""Initialising the connection information."""
self.HEADERS = HEADERS
def get_anime(self, url):
"""
Return None if response is not 200.
Otherwise, return [url, title, status].
"""
response = requests.get(url, headers=self.HEADERS)
if response.status_code != 200:
return None
soup = BeautifulSoup(response.content, 'html.parser')
data = soup.find('div', class_='rcol', style = 'width:701px; padding:0 0 0 6px;')
# Getting Title
title = data.find('h2').get_text(strip=True)
# Getting Status
str_find = "\nСерій: "
str_current = data.get_text()
str_current = str_current[str_current.find(str_find)+len(str_find):]
status = str_current[:str_current.find('\n')]
return [url, title, status]
Loading…
Cancel
Save