1 В избранное 0 Ответвления 0

OSCHINA-MIRROR/hongwing-WEIBO_DataAnalysis

В этом репозитории не указан файл с открытой лицензией (LICENSE). При использовании обратитесь к конкретному описанию проекта и его зависимостям в коде.
Клонировать/Скачать
Req_Info.py 4 КБ
Копировать Редактировать Web IDE Исходные данные Просмотреть построчно История
Henry Отправлено 28.11.2017 12:13 1d60e4e
# -*- coding:utf-8 -*-
'''
Request URL:https://m.weibo.cn/api/container/getIndex?uid=1350995007&luicode=10000011&lfid=100103type%3D3%26q%3D%E5%8F%A4%E5%8A%9B%E5%A8%9C%E6%89%8E&featurecode=20000320&type=uid&value=1350995007&containerid=1005051350995007
'''
'''
userinfo = {
'name': json_data['userInfo']['screen_name'], # 获取用户头像
'description': json_data['userInfo']['description'], # 获取用户描述
'follow_count': json_data['userInfo']['follow_count'], # 获取关注数
'followers_count': json_data['userInfo']['followers_count'], # 获取粉丝数
'profile_image_url': json_data['userInfo']['profile_image_url'], # 获取头像
'verified_reason': json_data['userInfo']['verified_reason'], # 认证信息
'containerid': json_data['tabsInfo']['tabs'][1]['containerid'] # 此字段在获取博文中需要
}
'''
import requests
from time import sleep
import jieba.analyse
from html2text import html2text
from PIL import Image, ImageSequence
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud, ImageColorGenerator
from Req_Person import *
# uid 是博主 id
def GetInfo(uid):
result = requests.get('https://m.weibo.cn/api/container/getIndex?type=uid&value={}'.format(uid))
json_data = result.json() # get data
user_info = {
'name': json_data['userInfo']['screen_name'],
'description': json_data['userInfo']['description'],
'follow_count': json_data['userInfo']['follow_count'],
'followers_count': json_data['userInfo']['followers_count'],
'profile_image_url': json_data['userInfo']['profile_image_url'],
#'verified_reason': json_data['userInfo']['verified_reason'],
'containerid': json_data['tabsInfo']['tabs'][1]['containerid']
}
if json_data['userInfo']['gender'] == 'm':
gender = '男'
elif json_data['userInfo']['gender'] == 'f':
gender = '女'
else:
gender = '未知'
user_info['gender'] = gender
return user_info
def GetPosts(uid, containerid):
page = 0 # from page
posts = [] # all posts (# 1321)
i = 0
while page < 5:
result = requests.get("https://m.weibo.cn/api/container/getIndex?type=uid&value={}&containerid={}&page={}".format(uid, containerid, page))
json_data = result.json()
if not json_data['cards']:
break
for item in json_data['cards']:
print str(i)+'---->\n'
if item.has_key('mblog'):
posts.append(item['mblog']['text'])
i += 1
if i > 1000:
break
sleep(0.5) # avoid the server judge the spyder
page += 1
return posts
def BuildKeywords(posts):
content = '\n'.join([html2text(i) for i in posts])
# 使用jieba的textrank提取关键词及其比重
result = jieba.analyse.textrank(content, topK=1000, withWeight=True)
# 生成关键词比重字典
keywords = dict()
for i in result:
keywords[i[0]] = i[1]
return keywords
def CreateInfoImage(img, content, uid):
# init Image
image = Image.open(img)
graph = np.array(image)
# 生成云图
wc = WordCloud(font_path='./font/ROUND.ttf',
background_color='white',
max_words=1000,
mask=graph)
wc.generate_from_frequencies(content)
image_color = ImageColorGenerator(graph)
wc.to_file('./static/pics/'+str(uid)+'.png')
# 显示图片
# plt.imshow(wc)
# plt.imshow(wc.recolor(color_func=image_color))
# plt.axis("off")
# plt.show()
def CreateWordCloudMap(name, img):
uid = GetUIDFromScreenName(name) # uid get
info = GetInfo(uid) # 个人信息
posts = GetPosts(uid, info['containerid']) # 博文信息
keywords = BuildKeywords(posts) # 关键词信息
# show map
CreateInfoImage(img, keywords, uid)
if __name__ == '__main__':
'''
Test
'''
# CreateWordCloudMap('柳岩', './bg/love.jpg')

Опубликовать ( 0 )

Вы можете оставить комментарий после Вход в систему

1
https://api.gitlife.ru/oschina-mirror/hongwing-WEIBO_DataAnalysis.git
git@api.gitlife.ru:oschina-mirror/hongwing-WEIBO_DataAnalysis.git
oschina-mirror
hongwing-WEIBO_DataAnalysis
hongwing-WEIBO_DataAnalysis
master