Web Scraping

Made file and path handling a little cleaner with pathlib.

  • from bs4 import BeautifulSoup
    import requests
    import shutil
    import os
    from pathlib import Path
    def get_codewars_stats(username):
        """Scraps, and retrieves Codewars stats of given username."""
        output = f'{username}\'s Codewars stats:\n'
        source = requests.get(f'{username}', stream=True)
        # Verify request status. Using 404 would miss a wide ranges of other failed connections.
        if source.status_code == 200:
            soup = BeautifulSoup(source.text, 'html.parser')
            stat_info = soup.findAll('div', class_='stat')
            important_values = [info.text for info in stat_info[:5] + stat_info[6:]]
            # Get url to users avatar/profile pic
            img_url = ''.join([el for el in str(soup.findAll('figure')[0].findNext('img')).split(' ') if 'src' in el]).replace('src="', '')
            # Get image_url requests:
            img_source = requests.get(img_url, stream=True)
            # The filepath where data will be saved:
            filepath = Path.cwd() / 'CodeWars'
            # Make Codewars directory if it does mot exist:
            if not filepath.is_dir():
            with filepath.with_suffix('.jpg').open('wb') as img_obj:
                # Save user's avatar/profile pic:
                img_source.raw.decode_content = True
                shutil.copyfileobj(img_source.raw, img_obj)
                print('Profile pic has been downloaded')
            with (filepath /'codewars_stats.txt').open('w', encoding='utf-8') as file_obj:
                # Save user's Codewars stats:
                for item in important_values:
                    file_obj.write(item + '\n')
                print('CodewarsStats have been successfully downloaded')
            output += '\n\t'.join([i for i in important_values])
            return output
            return 'Something went wrong, enter a valid Codewars username.'
    • from bs4 import BeautifulSoup
    • import requests
    • import shutil
    • import os
    • from pathlib import Path
    • def get_codewars_stats(username):
    • """Scraps, and retrieves Codewars stats of given username."""
    • output = f'{username}\'s Codewars stats:\n'
    • source = requests.get(f'{username}', stream=True)
    • # Verify request status. Using 404 would miss a wide ranges of other failed connections.
    • if source.status_code == 200:
    • soup = BeautifulSoup(source.text, 'html.parser')
    • stat_info = soup.findAll('div', class_='stat')
    • important_values = [info.text for info in stat_info[:5] + stat_info[6:]]
    • # Get url to users avatar/profile pic
    • img_url = ''.join([el for el in str(soup.findAll('figure')[0].findNext('img')).split(' ') if 'src' in el]).replace('src="', '')
    • # Get image_url requests:
    • img_source = requests.get(img_url, stream=True)
    • # The filepath where data will be saved:
    • filepath = os.path.join(os.getcwd(), 'CodeWars')
    • filepath = Path.cwd() / 'CodeWars'
    • # Make Codewars directory if it does mot exist:
    • if not os.path.isdir(filepath):
    • os.mkdir(filepath)
    • if not filepath.is_dir():
    • filepath.mkdir()
    • with open(os.path.join(filepath, username + '.jpg'), 'wb') as img_obj:
    • with filepath.with_suffix('.jpg').open('wb') as img_obj:
    • # Save user's avatar/profile pic:
    • img_source.raw.decode_content = True
    • shutil.copyfileobj(img_source.raw, img_obj)
    • print('Profile pic has been downloaded')
    • with open(os.path.join(filepath, 'codewars_stats.txt'), 'w', encoding='utf-8') as file_obj:
    • with (filepath /'codewars_stats.txt').open('w', encoding='utf-8') as file_obj:
    • # Save user's Codewars stats:
    • for item in important_values:
    • file_obj.write(item + '\n')
    • print('CodewarsStats have been successfully downloaded')
    • output += '\n\t'.join([i for i in important_values])
    • return output
    • else:
    • return 'Something went wrong, enter a valid Codewars username.'