parent
2674ed9329
commit
dadf74aa74
@ -0,0 +1,51 @@
|
||||
##########################################################################
|
||||
# Copyright (C) 2022 Kostya Klochko <kostya_klochko@ukr.net> #
|
||||
# #
|
||||
# This file is part of Anitube Simple Notification. #
|
||||
# #
|
||||
# Anitube Simple Notification is free software: you can redistribute #
|
||||
# it and/or modify it under the terms of the GNU General Public #
|
||||
# License as published by the Free Software Foundation, either version #
|
||||
# 3 of the License, or (at your option) any later version. #
|
||||
# #
|
||||
# Anitube Simple Notification is distributed in the hope that it will #
|
||||
# be useful, but WITHOUT ANY WARRANTY; without even the implied #
|
||||
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See #
|
||||
# the GNU General Public License for more details. #
|
||||
# #
|
||||
# You should have received a copy of the GNU General Public License #
|
||||
# along with Anitube Simple Notification. If not, see #
|
||||
# <https://www.gnu.org/licenses/>. #
|
||||
##########################################################################
|
||||
|
||||
"""
|
||||
This module has all for simplify work with scraping.
|
||||
"""
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
class Scraper:
|
||||
"""The handler of web connection."""
|
||||
def __init__(self, HEADERS):
|
||||
"""Initialising the connection information."""
|
||||
self.HEADERS = HEADERS
|
||||
|
||||
def get_anime(self, url):
|
||||
"""
|
||||
Return None if response is not 200.
|
||||
Otherwise, return [url, title, status].
|
||||
"""
|
||||
response = requests.get(url, headers=self.HEADERS)
|
||||
if response.status_code != 200:
|
||||
return None
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
data = soup.find('div', class_='rcol', style = 'width:701px; padding:0 0 0 6px;')
|
||||
# Getting Title
|
||||
title = data.find('h2').get_text(strip=True)
|
||||
# Getting Status
|
||||
str_find = "\nСерій: "
|
||||
str_current = data.get_text()
|
||||
str_current = str_current[str_current.find(str_find)+len(str_find):]
|
||||
status = str_current[:str_current.find('\n')]
|
||||
return [url, title, status]
|
Loading…
Reference in new issue