lol英雄,皮肤,符文,装备及比赛数据爬取

爬取官网装备数据

网址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import pymysql
import requests
import requests
from lxml import etree

#连接数据库
conn = pymysql.connect(host="localhost", port=3306, user="root", password="123456", database="lol")
#通过抓包获取json文件
url = "https://game.gtimg.cn/images/lol/act/img/js/items/items.js"
#UA伪装
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}

#0 ~ 480
resp = requests.get(url, headers=headers)
resp.encoding = resp.apparent_encoding
#装备数据json对象
items = resp.json()["items"]

with conn.cursor() as cursor:
for i in range(len(items)):
item = items[i]
name = item["name"]
price = item["price"]
effect = item["description"]

sql = "insert into item (id, name, price, effect) values (%s, %s, %s, %s)"
cursor.execute(sql, (str(i + 1), name, price, effect))
conn.commit()

conn.close()

爬取符文数据

网址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pymysql
import requests

import requests
from lxml import etree

conn = pymysql.connect(host="localhost", port=3306, user="root", password="123456", database="lol")
url = "https://lol.qq.com/act/a20170926preseason/data/cn/runes.json"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}


resp = requests.get(url, headers=headers)
resp.encoding = resp.apparent_encoding
#符文数据json对象
styles = resp.json()["styles"]

with conn.cursor() as cursor:
for i in range(len(styles)):
slots = styles[i]["slots"]
typ = styles[i]["name"]
for j in range(len(slots)):
runes = slots[j]["runes"]
is_main = "1" if j == 0 else "0"
for k in range(len(runes)):
rune = runes[k]
name = rune["name"]
effect = rune["longDescription"]

sql = "insert into rune (name, is_main, effect, type) values (%s, %s, %s, %s)"
cursor.execute(sql, (name, is_main, effect, typ))
conn.commit()

conn.close()

爬取英雄数据

网址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pymysql
import requests

import requests
from lxml import etree

conn = pymysql.connect(host="localhost", port=3306, user="root", password="123456", database="lol")
url = "https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}

#0 ~ 166
resp = requests.get(url, headers=headers)
resp.encoding = resp.apparent_encoding
#英雄数据json对象
heros = resp.json()["hero"]


with conn.cursor() as cursor:
for i in range(len(heros)):
hero = heros[i]
name = hero["name"]
price = hero["goldPrice"]
role = hero["roles"][0]

sql = "insert into hero (id, name, price, role) values (%s, %s, %s, %s)"
cursor.execute(sql, (str(i + 1), name, price, role))
conn.commit()

conn.close()

分页爬取皮肤

网址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pymysql
import requests
import requests
import time
from lxml import etree

conn = pymysql.connect(host="localhost", port=3306, user="root", password="qaz781212", database="lol")
furl = "https://lol.52pk.com/pifu/hero/hero_%d.shtml"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}

def chk(name):
if name == "铁铠幽冥":
name = "铁铠冥魂"
if name == "战争之王":
name = "不屈之枪"
if name == "炼金术师":
name = "炼金术士"
if name == "雷霆咆哮":
name = "不灭狂雷"
if name == "蒙多医生":
name = "祖安狂人"
if name == "暗夜射手":
name = "暗夜猎手"
if name == "圣毅之锤":
name = "圣锤之毅"
if name == "死亡歌颂者":
name = "死亡颂唱者"
if name == "末日使者":
name = "远古恐惧"
return name


idx = 0
with conn.cursor() as cursor:
#43页皮肤表,每页多个皮肤
#1 ~ 43
for i in range(1, 44):
url = furl % i
resp = requests.get(url, headers=headers)
resp.encoding = resp.apparent_encoding
#xpath解析详情页和英雄名称
tree = etree.HTML(resp.text)
urls = tree.xpath('//li[@class="boxShadow"]/a/@href')
names = tree.xpath('//li[@class="boxShadow"]/a/div[@class="pifuIntro"]/strong/text()')
#枚举每个详情页
#for i in range(5):
for i in range(len(urls)):
idx += 1
# if idx < 560:
# continue


resp = requests.get(urls[i], headers=headers)
resp.encoding = resp.apparent_encoding
if (i >= len(names)):
continue
name = names[i]
#解决数据冲突
name = chk(name)

tree = etree.HTML(resp.text)
skin_name = tree.xpath('//h1[@class="pifuName"]/text()')[0]
price = tree.xpath('//div[@class="price"]/p[1]/span/text()')
#只记录点券能购买的皮肤
if (len(price) == 0):
continue
price = price[0]
if (price[-2:] != "点券" and price[-2:] != "点卷"):
continue

sql = """insert into skin (id, name, price, hero_id) values (%s, %s, %s, (select id from hero where hero.name = %s))"""
cursor.execute(sql, (str(idx), skin_name + " " + name, price[:-2], name))
conn.commit()

time.sleep(0.1)

conn.close()

爬取lol赛事官网比赛数据

网址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import pymysql
import requests
import requests
import time
from lxml import etree

conn = pymysql.connect(host="localhost", port=3306, user="root", password="qaz781212", database="lol")
furl = "https://open.tjstats.com/match-auth-app/open/v1/compound/matchDetail?matchId=%d"
#观察request headers,必须加Authorization身份验证凭据,不然拒绝访问
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
'Authorization': '7935be4c41d8760a28c05581a7b1f570'
}

match_id = 0
team_id = 0
summoner_id = 0
rune_list_id = 0
item_list_id = 0
#player_name_set = set()

with conn.cursor() as cursor:
#11038~11062
#只爬取2024msi相关比赛
for i in range(11038, 11062):
resp = requests.get(furl % i,headers=headers)
resp.encoding = resp.apparent_encoding
#json数据
matchInfos = resp.json()["data"]["matchInfos"]
#解析json数据存储到数据库
for matchInfo in matchInfos:
match_id += 1

start_time = matchInfo["matchStartTime"]
#start_time = start_time[:10] + " " + start_time[11:-6]

duration = matchInfo["gameTime"]
is_rank = False
win_team_id = matchInfo["matchWin"]

sql = "insert into `match` (id, is_rank, start_time, duration) values (%s, %s, %s, %s)"
cursor.execute(sql, (match_id, is_rank, start_time, duration))
conn.commit()

teamInfos = matchInfo["teamInfos"]
for teamInfo in teamInfos:
team_id += 1

is_win = teamInfo["teamId"] == win_team_id
economy = teamInfo["golds"]
dragons = teamInfo["dragonAmount"]
kills = teamInfo["kills"]

sql = "insert into team (id, match_id, is_win, economy, dragons, kills) values (%s, %s, %s, %s, %s, %s)"
cursor.execute(sql, (team_id, match_id, is_win, economy, dragons, kills))
conn.commit()

playerInfos = teamInfo["playerInfos"]
for playerInfo in playerInfos:
summoner_id += 1

battleDetail = playerInfo["battleDetail"]
assist = battleDetail["assist"]
death = battleDetail["death"]
kill = battleDetail["kills"]

otherDetail = playerInfo["otherDetail"]
economy = otherDetail["golds"]

last_hit = playerInfo["minionKilled"]
position = playerInfo["playerLocation"]

#获取hero_id
hero_name = playerInfo["heroTitle"]
query = "select id from hero where name = %s"
cursor.execute(query, hero_name)
hero_id = cursor.fetchone()[0]

#获取player_id
player_name = playerInfo["playerName"]

query = "select id from player where name = %s"
cursor.execute(query, player_name)

#不存在这个player,先创建
if cursor.fetchone() == None:
sql = "insert into player(name) values(%s)"
cursor.execute(sql, player_name)
conn.commit()
query = "select id from player where name = %s"
cursor.execute(query,player_name)
player_id = cursor.fetchone()[0]

#插入数据summoner
sql = "insert into summoner (id, assist, death, economy, last_hit, `kill`, position, hero_id, player_id, team_id) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
cursor.execute(sql,(str(summoner_id), assist, death, economy, last_hit, kill, position, hero_id, player_id, team_id))
conn.commit()


#创建并获取item_list_id
items = playerInfo["items"]
item_names = [item["itemName"] for item in items]
item_ids = []
for item_name in item_names:
query = "select id from item where name = %s"
cursor.execute(query, item_name)
item_id = cursor.fetchone()
if item_id == None:
item_id = 1
else:
item_id = item_id[0]
item_ids.append(item_id)
item_ids = [str(id) for id in item_ids]

for item_id in item_ids:
item_list_id += 1
sql = "insert into item_list (id, item_id, summoner_id) values (%s, %s, %s)"
cursor.execute(sql,(str(item_list_id), str(item_id), str(summoner_id)))
conn.commit()

#创建并获取rune_list_id

runes = playerInfo["perkRunes"]
rune_names = [rune["runeName"] for rune in runes]
rune_ids = []
for rune_name in rune_names:
if rune_name == "吸收生命力":
rune_name = "过量治疗"
if rune_name == "三重补药":
rune_name = "完美时机"
query = "select id from rune where name = %s"
cursor.execute(query, rune_name)
rune_id = cursor.fetchone()
if rune_id == None:
rune_id = 1
else:
rune_id = rune_id[0]
rune_ids.append(rune_id)
rune_ids = [str(id) for id in rune_ids]

for rune_id in rune_ids:
rune_list_id += 1
sql = "insert into rune_list (id, rune_id, summoner_id) values (%s, %s, %s)"
cursor.execute(sql,(str(rune_list_id), str(rune_id), str(summoner_id)))
conn.commit()

conn.close()