网易云音乐歌单播放次数怎么计算（网易音乐统计歌单热度）

网易云音乐歌单播放次数怎么计算（网易音乐统计歌单热度）(1)

from selenium import webdriver

import time

import MySQLdb

from bs4 import BeautifulSoup

import re

from pyecharts import Bar

from pyecharts import WordCloud

from pyecharts import Page

from wordcloud import WordCloud,ImageColorGenerator

import matplotlib.pyplot as plt

import jieba

import sys

reload(sys)

sys.setdefaultencoding('utf8')

'''

#连接数据库

conn=MySQLdb.connect("localhost","root","1204","wymusicdata" ,charset="utf8")

cur=conn.cursor()

print "连接数据库成功"

'''

x=webdriver.Chrome() #不能少了括号

page=0

listenNums=[]

songLists=[]

authors=[]

#网易云音乐网页采用了框架，需要爬取的歌单在iframe里面，

# 这种情况我们直接定位是不能定位到元素，

# 采用x.switch_to_frame（“”）进入框架进行数据爬取

while True:

x.get('http://music.163.com/discover/playlist/?offset=%s' % page)

#x.get('http://music.163.com/discover/playlist/?offset=210')

x.switch_to_frame(x.find_element_by_name("contentFrame"))

html=x.page_source

soup=BeautifulSoup(html,"html.parser")

#精确定位找到我们需要的<li>标签内容

cm=soup.find(class_="m-cvrlst f-cb")

contents=cm.find_all('li')

for content in contents:

wan=str(content)

if "万" in wan:

wan=wan.replace('万',"0000")

else:

wan=wan

listenNum=re.findall(r'<span class="nb">(.*?)</span>',str(wan))

#songList=re.findall(r'<a class="tit f-thide s-fc0" href=".*?" title="(".*")">',str(content))

songList=content.find(class_="tit f-thide s-fc0").text

#author=re.findall(r'<a class="nm nm-icn f-thide s-fc3" href=".*?" title=".*?">(.*?)</a>',str(content))

author=content.find(class_="nm nm-icn f-thide s-fc3").text

#取出来的元素放进列表

listenNums.append(listenNum[0])

songLists.append(songList)

authors.append(author)

#到浏览到最后一页时("js-disabled")=下一页无法点击即认为爬取完成

if page!=0:

try:

x.find_element_by_class_name("js-disabled")

print "全部歌单爬取成功"

break

except:

print "已成功爬取第%s页"% (page/35 1)

pass

page =35

# if page==70:

# break

time.sleep(2)

#由于网易云音乐网站在底部有个播放器的小元素导致无法点击下一页

# try:

# x.find_element_by_class_name("znxt").click()

# time.sleep(2)

# except:

# print "全部歌单爬取成功"

# break

#在这里只能采用分析歌单的url，遍历所有的连接来代替点击下一页

#--------------------------------------------

#歌单名字做词频统计绘制词云图

ciyun=''

for words in songLists:

#rfw=re.match("([\u4e00-\u9fa5])",words)

pattern =re.compile(u"[\u4e00-\u9fa5] ")

result=re.findall(pattern,words)

for r in result:

word=jieba.cut(r)

if ciyun!='':

ciyun=ciyun ',' ",".join(word)

else:

ciyun =",".join(word)

ciyunList=ciyun.split(",")

dict={}

for i in ciyunList:

if i not in dict:

dict[i]=1

else:

dict[i] =1

key=[]

value=[]

for k,v in dict.items():

key.append(k)

value.append(v)

#page=Page()

bar=Bar("网易Music","网易云音乐歌单热度")

bar.add("歌单热度",songLists,listenNums,mark_point=["max", "min"])

#page.add(bar)

bar.render()

'''

wordcloud=WordCloud("歌单名词云",width=600,height=800)

wordcloud.add("",key,value,word_size_range=[20,100],shape="cardioid")

page.add(wordcloud)

#page.render()

'''

#词云

bgImage=plt.imread('xin.jpg')

wc=WordCloud(background_color='black',

mask=bgImage,

font_path='simkai.ttf',

max_font_size=50,

random_state=30,

)

wc.generate(ciyun)

plt.imshow(wc)

wc.to_file("WYsongLciyun.png")

网易云音乐歌单播放次数怎么计算（网易音乐统计歌单热度）

最新推荐

热门推荐