Uploaded code for scrapping https://leagueoflegends.fandom.com/.

allChampions_scrapper.py gets a list of all available champions. goldBounty_scrapper.py gets info about gold rewards associated to kills. patchInfo_scrapper.py gets the URLs storing each patch's relevant information. patchInfoDetail_scrapper.py (not fully developed) extracts the data from the specific patches' URLs.

Uploaded code for scrapping https://leagueoflegends.fandom.com/.
allChampions_scrapper.py gets a list of all available champions. goldBounty_scrapper.py gets info about gold rewards associated to kills. patchInfo_scrapper.py gets the URLs storing each patch's relevant information. patchInfoDetail_scrapper.py (not fully developed) extracts the data from the specific patches' URLs.
3f09492c · Alberto Blázquez Herranz · 1d1082e2 · 3f09492c · 3f09492c · 3f09492c
Commit 3f09492c authored Apr 12, 2020 by Alberto Blázquez Herranz
5 changed files
--- a/allChampions_scrapper.py
+++ b/allChampions_scrapper.py
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Apr 11 22:00:02 2020
+@author: Alberto
+"""
+from selenium import webdriver
+from bs4 import BeautifulSoup
+import pandas as pd
+save_locations = [''] # put other paths if necessary
+driver = webdriver.Chrome("chromedriver.exe")
+driver.get("https://leagueoflegends.fandom.com/wiki/List_of_champions")
+champion_names = []
+content = driver.page_source
+soup = BeautifulSoup(content, 'html.parser')
+table = soup.find('table', {'class': 'wikitable sortable jquery-tablesorter'})
+table_rows = table.findAll('td', {'style': 'text-align:left;'})
+for i in table_rows:
+    champion_names.append(i['data-sort-value'])
+driver.close()
+all_champions = pd.DataFrame({'Champion Name': champion_names}) 
+for path in save_locations:
+    all_champions.to_csv(path + 'all_champions.csv', index=False, sep=',')
--- a/goldBounty_scrapper.py
+++ b/goldBounty_scrapper.py
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Apr 11 13:07:28 2020
+@author: Alberto
+"""
+from selenium import webdriver
+from bs4 import BeautifulSoup
+import pandas as pd
+import numpy as np
+save_locations = [''] # put other paths if necessary
+driver = webdriver.Chrome("chromedriver.exe")
+driver.get("https://leagueoflegends.fandom.com/wiki/Kill")
+def formatString(string, input_type):
+    string = string.replace('\n', '')
+    string = string.replace('\xa0', '')
+    if len(string) == 0:
+        string = np.nan
+    elif len(string) > 1:
+        if input_type == 0:
+            string = '-' + string[1]
+        elif input_type == 1 or input_type == 2:
+            string = string[0]
+    return string
+tier = []
+consec_kills = []
+consec_deaths = []
+kill_bounty = []
+assist_bounty = []
+content = driver.page_source
+soup = BeautifulSoup(content, 'html.parser')
+table = soup.find('table', {'class': 'article-table'})
+table_headers = table.findAll('th')
+table_rows = table.findAll('tr')
+for i in table_rows:
+    for idx, j in enumerate(i.findAll('td')):
+        if idx == 0:
+            got = j.get_text()
+            got = formatString(got, 0)
+            tier.append(float(got))
+        elif idx ==1:
+            got = j.get_text()
+            got = formatString(got, 1)
+            consec_kills.append(float(got))
+        elif idx ==2:
+            got = j.get_text()
+            got = formatString(got, 2)
+            consec_deaths.append(float(got))
+        elif idx ==3:
+            got = j.find('span', {'style': 'white-space:normal;'}).get_text()
+            kill_bounty.append(float(got))
+        elif idx ==4:
+            got = j.find('span', {'style': 'white-space:normal;'}).get_text()
+            assist_bounty.append(float(got))
+driver.close()
+gold_info = pd.DataFrame({'Tier': tier,'Consecutive Kills': consec_kills,
+                          'Consecutive Deaths': consec_deaths, 'Kill Bounty': kill_bounty,
+                          'Assist Bounty': assist_bounty}) 
+for path in save_locations:
+    gold_info.to_csv(path + 'gold_info.csv', index=False, sep=',')
--- a/ipython.html
+++ b/ipython.html
--- a/patchInfoDetail_scrapper.py
+++ b/patchInfoDetail_scrapper.py
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Apr 11 21:04:28 2020
+@author: Alberto
+"""
+from selenium import webdriver
+from bs4 import BeautifulSoup
+import pandas as pd
+import re
+month_dict = {'January': '1',
+ 'February': '2',
+ 'March': '3',
+ 'April': '4',
+ 'May': '5',
+ 'June': '6',
+ 'July': '7',
+ 'August': '8',
+ 'September': '9',
+ 'October': '10',
+ 'November': '11',
+ 'December': '12'}
+def formatString(string):
+    string = string.replace('\n', '')
+    string = string.replace('\xa0', '')
+    return string
+def returnDate(date):
+    month = month_dict[formatString(date[0])]
+    year = date[2]
+    day = re.search('[0-9]*', date[1]).group(0)
+    return day +'-'+ month +'-'+ year
+patch_info = pd.read_csv('patch_info.csv')
+champion_names = pd.read_csv('all_champions.csv')
+save_locations = ['', 'C:\\Users\\Alberto\\Desktop\\UNED\\2O CUATRIMESTRE\\VD\\data\\scrapped\\'] # put other paths if necessary
+patch_id = []
+champion = []
+patch_date = []
+change_description = []
+for i in range(1):#len(patch_info)):
+    url_check = patch_info.loc[i, 'Info URL']
+    driver = webdriver.Chrome("chromedriver.exe")
+    driver.get(url_check)
+    content = driver.page_source
+    soup = BeautifulSoup(content, 'html.parser')
+    patch_id.append(soup.find('h1', {'class': 'page-header__title'}).get_text())
+    date_info = soup.find('td', {'class': 'pi-horizontal-group-item pi-data-value pi-font pi-border-color pi-item-spacing'}).get_text().split(' ')
+    patch_date.append(returnDate(date_info))
+    champion_info = soup.findAll('dl')
+    first_added = False # There may be a more elegant way to do this
+    for idx, j in enumerate(champion_info):
+        champion_name = j.find('span', {'style': 'white-space:normal;'})
+        if champion_name is not None:
+            champion_name = champion_name.get_text()
+            if champion_name in champion_names['Champion Name'].tolist():
+                #changes = soup.findAll('ul')
+                changes =j.find_next_sibling('ul')
+                print(changes)
+                print("____________________")
+                """
+                for k in changes:
+                    summary = {}
+                    print(k.find('ul'))
+            """
+                if first_added == True:
+                    patch_id.append(patch_id[-1])
+                    patch_date.append(patch_date[-1])
+                champion.append(champion_name)
+                first_added = True
+driver.close()
+patchInfo_detail = pd.DataFrame({'ID': patch_id, 'Champion': champion, 'Date': patch_date}) 
+for path in save_locations:
+    patchInfo_detail.to_csv(path + 'patchInfo_detail.csv', index=False, sep=',')
--- a/patchInfo_scrapper.py
+++ b/patchInfo_scrapper.py
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Apr 11 17:48:01 2020
+@author: Alberto
+"""
+from selenium import webdriver
+from bs4 import BeautifulSoup
+import pandas as pd
+save_locations = [''] # put other paths if necessary
+driver = webdriver.Chrome("chromedriver.exe")
+driver.get("https://leagueoflegends.fandom.com/wiki/Patch")
+base_patch_url = 'https:\\leagueoflegends.fandom.com'
+lookup_data = {'Season Five': 2015, 'Season Six': 2016, 'Season Seven': 2017,
+               'Season Eight': 2018}
+season_year = []
+info_url = []
+patch_id = []
+content = driver.page_source
+soup = BeautifulSoup(content, 'html.parser')
+table = soup.find('div', {'class': 'va-collapsible-content mw-collapsible-content'})
+table_rows = table.findAll('tr')
+for i in table_rows:
+    match = False
+    patch_id_aux = None
+    for idx, j in enumerate(i.findAll('th')):
+        if idx == 0:
+            got = j.find('a')
+            if got is not None:
+                got = got.get_text()
+                if got in lookup_data:
+                    match = True                    
+                    season_year.append(lookup_data[got])
+        elif idx == 1 and match == True:
+            got = j.get_text()
+            got = got.replace('\n', '')
+            got = got.replace(' ', '')
+            patch_id_aux = got
+    if match == True:
+        patches = i.find('td')
+        for idx, j in enumerate(patches.findAll('li')):
+            got = j.find('a', href=True)
+            patch_rest_url = got['href']
+            patch_rest_id = got.get_text()
+            patch_id.append(patch_id_aux.replace('x', patch_rest_id))
+            info_url.append(base_patch_url+patch_rest_url.replace('/', '\\'))
+            if idx > 0:
+                season_year.append(season_year[-1])
+driver.close()
+patch_info = pd.DataFrame({'Season Year': season_year, 'Info URL': info_url,
+                          'ID': patch_id}) 
+for path in save_locations:
+    patch_info.to_csv(path + 'patch_info.csv', index=False, sep=',')