Web Scraping
Made file and path handling a little cleaner with pathlib.
from bs4 import BeautifulSoup import requests import shutil import os from pathlib import Path def get_codewars_stats(username): """Scraps, and retrieves Codewars stats of given username.""" output = f'{username}\'s Codewars stats:\n' source = requests.get(f'https://www.codewars.com/users/{username}', stream=True) # Verify request status. Using 404 would miss a wide ranges of other failed connections. if source.status_code == 200: soup = BeautifulSoup(source.text, 'html.parser') stat_info = soup.findAll('div', class_='stat') important_values = [info.text for info in stat_info[:5] + stat_info[6:]] # Get url to users avatar/profile pic img_url = ''.join([el for el in str(soup.findAll('figure')[0].findNext('img')).split(' ') if 'src' in el]).replace('src="', '') # Get image_url requests: img_source = requests.get(img_url, stream=True) # The filepath where data will be saved: filepath = Path.cwd() / 'CodeWars' # Make Codewars directory if it does mot exist: if not filepath.is_dir(): filepath.mkdir() with filepath.with_suffix('.jpg').open('wb') as img_obj: # Save user's avatar/profile pic: img_source.raw.decode_content = True shutil.copyfileobj(img_source.raw, img_obj) print('Profile pic has been downloaded') with (filepath /'codewars_stats.txt').open('w', encoding='utf-8') as file_obj: # Save user's Codewars stats: for item in important_values: file_obj.write(item + '\n') print('CodewarsStats have been successfully downloaded') output += '\n\t'.join([i for i in important_values]) return output else: return 'Something went wrong, enter a valid Codewars username.'
- from bs4 import BeautifulSoup
- import requests
- import shutil
- import os
- from pathlib import Path
- def get_codewars_stats(username):
- """Scraps, and retrieves Codewars stats of given username."""
- output = f'{username}\'s Codewars stats:\n'
- source = requests.get(f'https://www.codewars.com/users/{username}', stream=True)
- # Verify request status. Using 404 would miss a wide ranges of other failed connections.
- if source.status_code == 200:
- soup = BeautifulSoup(source.text, 'html.parser')
- stat_info = soup.findAll('div', class_='stat')
- important_values = [info.text for info in stat_info[:5] + stat_info[6:]]
- # Get url to users avatar/profile pic
- img_url = ''.join([el for el in str(soup.findAll('figure')[0].findNext('img')).split(' ') if 'src' in el]).replace('src="', '')
- # Get image_url requests:
- img_source = requests.get(img_url, stream=True)
- # The filepath where data will be saved:
filepath = os.path.join(os.getcwd(), 'CodeWars')- filepath = Path.cwd() / 'CodeWars'
- # Make Codewars directory if it does mot exist:
if not os.path.isdir(filepath):os.mkdir(filepath)- if not filepath.is_dir():
- filepath.mkdir()
with open(os.path.join(filepath, username + '.jpg'), 'wb') as img_obj:- with filepath.with_suffix('.jpg').open('wb') as img_obj:
- # Save user's avatar/profile pic:
- img_source.raw.decode_content = True
- shutil.copyfileobj(img_source.raw, img_obj)
- print('Profile pic has been downloaded')
with open(os.path.join(filepath, 'codewars_stats.txt'), 'w', encoding='utf-8') as file_obj:- with (filepath /'codewars_stats.txt').open('w', encoding='utf-8') as file_obj:
- # Save user's Codewars stats:
- for item in important_values:
- file_obj.write(item + '\n')
- print('CodewarsStats have been successfully downloaded')
- output += '\n\t'.join([i for i in important_values])
- return output
- else:
- return 'Something went wrong, enter a valid Codewars username.'