1 В избранное 0 Ответвления 0

OSCHINA-MIRROR/xia_zhaoqiang-rbpr-recomender-systems

Присоединиться к Gitlife
Откройте для себя и примите участие в публичных проектах с открытым исходным кодом с участием более 10 миллионов разработчиков. Приватные репозитории также полностью бесплатны :)
Присоединиться бесплатно
Клонировать/Скачать
Rating_BPR.py 28 КБ
Копировать Редактировать Web IDE Исходные данные Просмотреть построчно История
夏召强 Отправлено 09.09.2022 06:15 d58b83b
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788
#Ver1.0
#Zero @2012.5.2
#
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
from numpy import linalg as LA
import math
import random
import pickle
from decimal import *
import numpy as np
def read_file_without_scores(file_to_read, space_type='\t'):
dict_items = dict()
dict_not_items = dict()
list_users = list()
list_items = list()
cont = 0
fi=open(file_to_read, 'r')
for line in fi:
if line.strip():
cont += 1
inline = line.split(space_type)
list_users.append(int(inline[0]))
list_items.append(int(inline[1]))
if int(inline[0]) in dict_items:
dict_items[int(inline[0])] += [int(inline[1])]
else:
dict_items[int(inline[0])] = [int(inline[1])]
fi.close()
list_users = sorted(list(set(list_users))) #set删除重复元素, sorted 从小到大排序
list_items = sorted(list(set(list_items))) #list_users按顺序存储所有的用户,list_items按顺序存储所有评价过的项目
for user in list_users:
dict_not_items[user] = list(set(list_items) - set(dict_items[user])) #dict_not_items存储所有的用户未评分的项目
return dict_items, dict_not_items, list_users, list_items, cont, len(list_users), len(list_items)
__author__ = 'Arthur Fortes'
learn_rate = 0.01
learn_rate1= 0.01
regularization = 0.01
reg_u = 0.01
reg_i = 0.01
reg_j = 0.01
reg_bias = 0
getcontext().prec = 20
#number_int = 15000
number_int = 15000
def create_bias(bias_size):
bias = np.random.uniform(0, 1, bias_size)
return bias
def create_factors(num_users, num_items, factors):
users_factors = np.random.uniform(0, 1, [num_users, factors])
items_factors = np.random.uniform(0, 1, [num_items, factors])
return users_factors, items_factors
class BPRMF(object):
def __init__(self, file_to_train, file_to_write, space_type='\t', num_factors=10, num_interactions=30):
self.file_to_train = file_to_train
self.file_to_write = file_to_write
self.num_factors = num_factors
self.num_interactions = num_interactions
dict_items, dict_not_items, list_users, list_items, num_int, num_users, num_items = read_file_without_scores(
file_to_train, space_type)
self.dict_items = dict_items
self.dict_not_items = dict_not_items
self.list_users = list_users
self.list_items = list_items
self.num_int = num_int
self.num_users = num_users
self.num_items = num_items
self.bias_items = create_bias(num_items)
self.u_factors, self.i_factors = create_factors(num_users, num_items, num_factors)
print("Training data: " + str(num_users) + " users | " + str(num_items) + " items | " +
str(num_int) + " interactions...")
print("BPRMF num_factors=" + str(num_factors) + " | bias_reg=" + str(reg_bias) + " | reg_u=" + str(reg_u) +
" | reg_i=" + str(reg_i) + " | reg_j=" + str(reg_j) + " | learn rate= " + str(learn_rate))
self.train_bprmf()
#self.predict_bprmf1()
print('Trained')
#self.predict_score()
def sample_triple(self):
user = random.choice(self.list_users)
item = random.choice(self.dict_items[user])
other_item = random.choice(self.dict_not_items[user])
user_id = self.list_users.index(user)
item_id = self.list_items.index(item)
other_item_id = self.list_items.index(other_item)
return user, item, other_item, user_id, item_id, other_item_id
'''
def run(self):
self.train_bprmf()
print('Trained')
self.predict_bprmf()
'''
def train_bprmf(self):
rmse_result=[]
for i in range(self.num_interactions):
self.iterate_bprmf()
#self.predict()
#self.predict_score()
if i%2==0 and i>=2000:
print("i, eui=1,特征融合, K1=0.9 TOP=1, bpr-max(pmf_i + pmf_j) program=bpr_svd_1 ml100k",i)
print("ml100k i",i)
self.predict_bprmf1(1)
#self.predict_bprmf2(1)
#self.predict_bprmf2(5)
#self.predict_bprmf2(10)
else:
print("ml100k rating number 5 i=",i)
#
#self.predict_score()
#rmse_result.append(rmse)
#print("step, rmse:",i, rmse_result)
def iterate_bprmf(self):
i = 0
for _ in range(number_int):
i += 1
user,item, other_item, user_id, item_id, other_item_id = self.sample_triple()
self.update_factors_bprmf(user, item, other_item, user_id, item_id, other_item_id)
return self.u_factors, self.i_factors, self.bias_items
def update_factors_bprmf(self, user, item, other_item, u, i, j):
rui = self.bias_items[i] + sum(np.array(self.u_factors[u]).T * np.array(self.i_factors[i]))
ruj = self.bias_items[j] + sum(np.array(self.u_factors[u]).T * np.array(self.i_factors[j]))
uid=user-1
iid=item-1
eui= 1
A=5
B=0
K1 = 0.9
K2 = 1-K1
eui1 = V[uid,iid]
ruj0 = P_rating[user-1,other_item-1]
#eui=(2**eui-1)/2**5
x_uij = eui*(rui - ruj)
#print("x_uij:",x_uij)
if x_uij<-700:
fun_exp=1
else:
temp=math.exp(-x_uij)
fun_exp = float(temp) / float((1.0 + temp))
g_ui = 1/float((1.0 +math.exp(-rui)))
delta_gui = math.exp(-rui)*g_ui*g_ui
g_uj = 1/float((1.0 +math.exp(-ruj)))
delta_guj = math.exp(-ruj)*g_uj*g_uj
update_bias_i = K1*fun_exp - reg_bias * self.bias_items[i] + K2*0.5*A*(eui1+B-A*g_ui)*delta_gui #- 0.2*(rui-eui1)
self.bias_items[i] += learn_rate * update_bias_i
update_bias_j = -K1*fun_exp - reg_bias * self.bias_items[j]+ K2*0.5*A*(ruj0+B-A*g_uj)*delta_guj
self.bias_items[j] += learn_rate * update_bias_j
for num in range(self.num_factors):
w_uf = self.u_factors[u][num]
h_if = self.i_factors[i][num]
h_jf = self.i_factors[j][num]
update_user = K1*(h_if - h_jf) * fun_exp - reg_u * w_uf + K2*0.5*A*(eui1+B-A*g_ui)*delta_gui* h_if + K2*0.5*A*(ruj0+B-A*g_uj)*delta_guj* h_jf #- 0.2*(rui-eui1)*h_if
self.u_factors[u][num] = w_uf + learn_rate * update_user
#self.u_factors[u][num]+= learn_rate1*(h_if - regularization * self.u_factors[u][num])
#if self.u_factors[u][num]<0:
# self.u_factors[u][num]=0
update_item_i =K1*w_uf * fun_exp - reg_i * h_if + K2*0.5*A*(eui1+B-A*g_ui)*delta_gui * w_uf #- 0.2*(rui-eui1)*w_uf
self.i_factors[i][num] = h_if + learn_rate * update_item_i
#self.i_factors[i][num]+= learn_rate1 * (w_uf - regularization * self.i_factors[i][num])
update_item_j = -K1*w_uf * fun_exp - reg_j * h_jf + K2*0.5*A*(ruj0+B-A*g_uj)*delta_guj * w_uf
self.i_factors[j][num] = h_jf + learn_rate * update_item_j
def predict_bprmf(self):
final_rank = list()
for user in self.list_users:
#print(user)
score_items = list()
u = self.list_users.index(user)
for item in self.dict_not_items[user]:
i = self.list_items.index(item)
rui = self.bias_items[i] + sum([a*b for a, b in zip(self.u_factors[u], self.i_factors[i])])
# rui = self.bias_items[i] + sum(np.array(self.u_factors[u]).T * np.array(self.i_factors[i]))
score_items.append([i, rui])
list_items = sorted(score_items, key=lambda x: -x[1]) #按照score_items第二个元素进行排序
final_rank.append([user, list_items[:10]])
print('Writing ranks...')
with open(self.file_to_write, 'w') as infile_write:
for user in final_rank:
for item in user[1]:
infile_write.write(str(user[0]) + '\t' + str(item[0]) + '\t' + str(item[1]) + '\n')
def predict(self):
X= []
Y= []
bias_i= np.zeros(1682)
for user in range(943):
x=np.zeros(self.num_factors)
if user in self.list_users:
u=self.list_users.index(user)
x=self.u_factors[u]
X.append(x)
for item in range(1682):
y=np.zeros(self.num_factors)
if item in self.list_items:
i=self.list_items.index(item)
y=self.i_factors[i]
bias_i[item]=self.bias_items[i]
Y.append(y)
fi=open(modelSaveFile,'wb')
fi.truncate()
pickle.dump(bias_i, fi, True)
pickle.dump(X, fi, True)
pickle.dump(Y, fi, True)
fi.close()
print("model generation over")
'''
fi=open(testDataFile, "r")
RMSE=0
MAE=0
cnt=0
for line in fi:
cnt+=1
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
score=int(arr[2].strip())
prediction= self.bias_items[iid] + sum([a*b for a, b in zip(self.u_factors[uid], self.i_factors[iid])])
eui=score-prediction
#print("score, prediction,eui:" ,score,prediction,eui)
MAE+=abs(eui)
RMSE+=eui*eui
MAE/=cnt
RMSE=math.sqrt(RMSE/cnt)
print("RMSE, MAE:",RMSE,MAE)
return RMSE
'''
def predict_bprmf1(self, TOP):
final_rank = list()
pre_5 =[]
map_5 =[]
Recall_5 = []
MRR=0
PRE=0
MAP=0
Recall = 0
mrr = []
Top_k = TOP
'''
fi=open(modelSaveFile,'rb')
bias_i=pickle.load(fi)
pu=pickle.load(fi)
qi=pickle.load(fi)
fi.close()
'''
fi=open(modelSaveFile0,'rb')
qi = pickle.load(fi)
pu = pickle.load(fi)
fi.close()
ka=0
#print("list_testusers",list_testusers)
#print("self.list_users",self.list_users)
for user in list_testusers:
#print(user)
item_test=[]
final_rank = list()
final_rank_order = list()
final_rank_item = list()
final_test=list()
final_item=[]
for j in range(iteNum):
if N[user-1,j]>0:
item_test.append([(j+1),N[user-1,j]])
final_test = sorted(item_test, key=lambda x: -x[1])
final_item = [x[0] for x in final_test[:]]
#print("final_item",final_item)
if len(final_item)==0:
ka+=1
continue
fflag=0
if user in self.list_users:
fflag=0
else:
fflag=1
if fflag==1:
continue
score_items = list()
u = self.list_users.index(user)
for item in self.dict_not_items[user]:
i = self.list_items.index(item)
#rui = self.bias_items[i] + sum([a*b for a, b in zip(self.u_factors[u], self.i_factors[i])])
rui = (self.bias_items[i] + sum(np.array(self.u_factors[u]).T * np.array(self.i_factors[i])))
#rui2 = sum([a*b for a, b in zip(pu[user-1], qi[item-1])])
#rui = 0.8*rui1 + 0.2*rui2
#rui = self.bias_items[i] + sum(np.array(pu[user-1]+self.u_factors[u]).T * np.array(qi[item-1]+self.i_factors[i]))
#rui = self.bias_items[i] + sum(np.array(pu[user-1]+self.u_factors[u]).T * np.array(qi[item-1]+self.i_factors[i]))
#rui = bias_i[i-1] + sum([a*b for a, b in zip(pu[user-1], qi[item-1])])
score_items.append([item,rui])
#if user<2:
#print("score_items[:-1]",sorted(score_items, key=lambda s: s[1], reverse=True))
#list_items = sorted(score_items, key=lambda x: -x[1])
#print("list_items",list_items)
#final_rank.append([user, list_items[:10]])
#print("final_rank",final_rank)
#final_rank.append([user, list_items[:]])
final_rank = sorted(score_items, key=lambda x: -x[1]) #按照score_items第二个元素进行排序
final_rank_order = [x[0] for x in final_rank[:]]
final_rank_item = [x[0] for x in final_rank[:Top_k]]
#print("final_rank_item", final_rank_item)
mrr_flag = 0
for rank,ii in enumerate(final_rank_order):
if ii in final_item:
mrr.append(1.0/(rank+1))
mrr_flag=1
break
if mrr_flag==0:
mrr.append(0)
correct_num=0
for rank2,item2 in enumerate(final_rank_item):
for cc in final_item:
if cc == item2:
correct_num=correct_num+1
break
pre_5.append(correct_num/Top_k)
Recall_5.append(correct_num/len(final_item))
pre5=0
true_num=0
count_i=0
ap=[]
for rank1,item1 in enumerate(final_rank_order):
count_i=count_i+1
for bb in final_item:
if bb == item1:
true_num+=1
ap.append(true_num/(rank1+1))
break
#print("true_num,count_i",true_num,count_i)
if true_num==0:
map_5.append(0)
else:
map_5.append(sum(ap)/len(final_item))
#print("len(ap), true_num",len(ap), true_num)
if len(pre_5)==0:
PRE=0
else:
PRE=np.mean(pre_5)
if sum(map_5)==0:
MAP=0
else:
MAP=np.mean(map_5)
#print("len(map_5)",map_5)
if len(mrr)==0:
MRR=0
else:
MRR=np.mean(mrr)
if len(Recall_5)==0:
Recall=0
else:
Recall=np.mean(Recall_5)
F1=0
if PRE+Recall==0:
F1=0
else:
F1=2*Recall*PRE/(PRE+Recall)
final_map.append(MAP)
final_mrr.append(MRR)
final_f1.append(F1)
final_pre.append(PRE)
final_recall.append(Recall)
print('test Top_k={0:.0f}, pre= {1:.4f}, recall= {2:.4f}, MAP={3:.4f},MRR={4:.4f}, F1={5:.4f}'.format(Top_k, PRE, Recall, MAP, MRR, F1))
print('test max pre= {0:.4f}, recall= {1:.4f}, MAP={2:.4f},MRR={3:.4f}, F1={4:.4f}'.format(max(final_pre),max(final_recall),max(final_map),max(final_mrr),max(final_f1)) )
return( MRR,PRE, MAP)
def predict_bprmf2(self, TOP):
final_rank = list()
pre_5 =[]
map_5 =[]
Recall_5 = []
MRR=0
PRE=0
MAP=0
Recall = 0
mrr = []
Top_k = TOP
'''
fi=open(modelSaveFile,'rb')
bias_i=pickle.load(fi)
pu=pickle.load(fi)
qi=pickle.load(fi)
fi.close()
'''
fi=open(modelSaveFile0,'rb')
qi = pickle.load(fi)
pu = pickle.load(fi)
fi.close()
ka=0
for user in list_testusers:
#print(user)
item_test=[]
final_rank = list()
final_rank_order = list()
final_rank_item = list()
final_test=list()
final_item=[]
for j in range(iteNum):
if N[user-1,j]>0:
item_test.append([(j+1),N[user-1,j]])
final_test = sorted(item_test, key=lambda x: -x[1])
final_item = [x[0] for x in final_test[:]]
#print("final_item",final_item)
if len(final_item)==0:
ka+=1
continue
score_items = list()
u = self.list_users.index(user)
for item in self.dict_not_items[user]:
i = self.list_items.index(item)
#rui = self.bias_items[i] + sum([a*b for a, b in zip(self.u_factors[u], self.i_factors[i])])
rui = (self.bias_items[i] + sum(np.array(self.u_factors[u]).T * np.array(self.i_factors[i])))
#rui2 = sum([a*b for a, b in zip(pu[user-1], qi[item-1])])
#rui = 0.8*rui1 + 0.2*rui2
#rui = self.bias_items[i] + sum(np.array(pu[user-1]+self.u_factors[u]).T * np.array(qi[item-1]+self.i_factors[i]))
#rui = self.bias_items[i] + sum(np.array(pu[user-1]+self.u_factors[u]).T * np.array(qi[item-1]+self.i_factors[i]))
#rui = bias_i[i-1] + sum([a*b for a, b in zip(pu[user-1], qi[item-1])])
score_items.append([item,rui])
#if user<2:
#print("score_items[:-1]",sorted(score_items, key=lambda s: s[1], reverse=True))
#list_items = sorted(score_items, key=lambda x: -x[1])
#print("list_items",list_items)
#final_rank.append([user, list_items[:10]])
#print("final_rank",final_rank)
#final_rank.append([user, list_items[:]])
final_rank = sorted(score_items, key=lambda x: -x[1]) #按照score_items第二个元素进行排序
final_rank_order = [x[0] for x in final_rank[:]]
final_rank_item = [x[0] for x in final_rank[:Top_k]]
#print("final_rank_item", final_rank_item)
correct_num=0
for rank2,item2 in enumerate(final_rank_item):
for cc in final_item:
if cc == item2:
correct_num=correct_num+1
break
pre_5.append(correct_num/Top_k)
Recall_5.append(correct_num/len(final_item))
pre5=0
if len(pre_5)==0:
PRE=0
else:
PRE=np.mean(pre_5)
if len(Recall_5)==0:
Recall=0
else:
Recall=np.mean(Recall_5)
F1=0
F1=2*Recall*PRE/(PRE+Recall)
print('test Top_k={0:.0f}, pre= {1:.4f}, recall= {2:.4f}, F1={3:.4f}'.format(Top_k, PRE, Recall, F1))
return(Top_k, PRE, Recall)
def predict_score(self):
bias_i =[0.0 for i in range(1682)]
pu=np.zeros((943,self.num_factors))
qi=np.zeros((1682,self.num_factors))
fi=open(modelSaveFile,'rb')
bias_i=pickle.load(fi)
pu=pickle.load(fi)
qi=pickle.load(fi)
fi.close()
qi_lfm=np.zeros((1682,self.num_factors))
pu_lfm=np.zeros((943,self.num_factors))
fi=open(modelSaveFile0,'rb')
qi_lfm=pickle.load(fi)
pu_lfm=pickle.load(fi)
fi.close()
#train
Score_max=0
fi=open(trainDataFile,'r')
for line in fi:
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
pScore=bias_i[iid] + sum([a*b for a, b in zip(pu[uid], qi[iid])])
if pScore>Score_max:
Score_max=pScore
fi.close()
for step in range(1):
'''
cnt_train=0
rmse_train=0
MAE_train=0
fi=open(trainDataFile,'r')
for line in fi:
cnt_train+=1
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
tScore=int(arr[2].strip())
pScore_lfm=sum([a*b for a, b in zip(pu_lfm[uid], qi_lfm[iid])])
pScore=bias_i[iid] + sum([a*b for a, b in zip(pu[uid], qi[iid])])
#pScore=pScore*5/Score_max
eui=tScore-pScore
MAE_train+=abs(eui)
rmse_train+=eui*eui
fi.close()
MAE_train/=cnt_train
rmse_train=math.sqrt(rmse_train/cnt_train)
print("step,MAE_train,rmse_train %d: %f: %f:"%(step,MAE_train, rmse_train))
'''
#predict
fi=open(testDataFile,'r')
MAE=0
rmse=0
rmse_av=0
rmse_compare=0
cnt=0
for line in fi:
cnt+=1
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
tScore=float(arr[2].strip())
u = self.list_users.index(uid+1)
if iid+1 in self.dict_not_items[uid+1]:
i = self.list_items.index(iid+1)
pScore_lfm=sum([a*b for a, b in zip(pu_lfm[uid], qi_lfm[iid])])
pScore = self.bias_items[i] + sum(np.array(self.u_factors[u]).T * np.array(self.i_factors[i]))
pScore = pScore*5/Score_max
if pScore<1:
pScore=1
if pScore_lfm>5:
pScore_lfm=5
eui=tScore-pScore
rmse+=eui*eui
rmse_av+=(tScore-pScore_lfm)*(tScore-pScore_lfm)
MAE+=abs(eui)
#print("tScore,pScore,eui, %f: %f: %f:"%(tScore,pScore,eui))
fi.close()
MAE/=cnt
rmse=math.sqrt(rmse/cnt)
rmse_av=math.sqrt(rmse_av/cnt)
print("MAE, rmse_test, rmse_av %f: %f: %f: " %(MAE, rmse, rmse_av ))
#return rmse
def LFM(testDataFile, trainDataFile, modelSaveFile0, useNum, iteNum):
#get the configure
rmse_result=[]
factorNum=10
userNum=useNum
itemNum=iteNum
learnRate=0.001
regularization=0.005
#train model
#pScore = pu*qi
for factorNum in range(10, 15, 20):
temp = math.sqrt(factorNum) #temp等于factorNum的平方根
#qi = [[(random.uniform(1, 0.5*temp) / temp) for j in range(factorNum)] for i in range(itemNum)] #qi为一个随机矩阵,大小为itemNum*userNum,每个元素都是随机数,大小为[0,0.1*temp]
#pu = [[(random.uniform(1, 0.5*temp) / temp) for j in range(factorNum)] for i in range(userNum)] #pu为一个随机矩阵,大小为itemNum*userNum,每个元素都是随机数,大小为[0,0.1*temp]
qi = [[(0.5 * random.random() / temp) for j in range(factorNum)] for i in range(itemNum)] #qi为一个随机矩阵,大小为itemNum*userNum,每个元素都是随机数,大小为[0,0.1*temp]
pu = [[(0.5 * random.random() / temp) for j in range(factorNum)] for i in range(userNum)] #pu为一个随机矩阵,大小为itemNum*userNum,每个元素都是随机数,大小为[0,0.1*temp]
print("initialization end\nstart training\n")
preRmse = 1000000.0
for step in range(186):
fi = open(trainDataFile, 'r')
for line in fi:
prediction=0
arr = line.split()
uid = int(arr[0].strip()) - 1
iid = int(arr[1].strip()) - 1
score = float(arr[2].strip())
for iii in range(len(pu[uid])):
prediction+=pu[uid][iii]*qi[iid][iii]
#prediction = PredictScore(pu[uid], qi[iid])
eui = score - prediction
#update parameters
for k in range(factorNum):
temp = pu[uid][k] #attention here, must save the value of pu before updating
pu[uid][k] += learnRate * (eui * qi[iid][k] - regularization * pu[uid][k])
qi[iid][k] += learnRate * (eui * temp - regularization * qi[iid][k])
fi.close()
#learnRate *= 0.9
#curRmse = Validate(testDataFile, averageScore, bu, bi, pu, qi)
fo = open(modelSaveFile0, 'wb')
fo.truncate() #清空文件内容
pickle.dump(qi, fo, True)
pickle.dump(pu, fo, True)
fo.close()
def UserNum(fileName1):
fi=open(fileName1, 'r')
usernum=0
result=0
for line in fi:
arr=line.split('\t')
usernum=int(arr[0].strip())
if usernum>result:
result=usernum
fi.close()
return result
def ItemNum(fileName1):
fi=open(fileName1, 'r')
itemnum=0
result=0
for line in fi:
arr=line.split('\t')
itemnum=int(arr[1].strip())
if itemnum>result:
result=itemnum
fi.close()
return result
if __name__ == '__main__':
trainDataFile = 'ML100K_number\\ML100K_train5.txt'
testDataFile = 'ML100K_number\\ML100K_test5.txt'
resultSaveFile = 'BPRMF_Result.txt'
modelSaveFile = 'BPRMF_model_50.pkl'
modelSaveFile0 = 'lfm_model.pkl'
final_map=[]
final_mrr=[]
final_f1=[]
final_pre=[]
final_recall=[]
userNum = max(UserNum(trainDataFile),UserNum(testDataFile))
iteNum = max(ItemNum(trainDataFile),ItemNum(testDataFile))
#V=np.zeros((943,1682))
#N=np.zeros((943,1682))
V=np.zeros((userNum,iteNum))
N=np.zeros((userNum,iteNum))
P_rating=np.zeros((userNum,iteNum))
fi=open(trainDataFile,'r')
for line in fi:
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
tScore=float(arr[2].strip())
V[uid,iid]=tScore
fi.close()
list_testusers = list()
fi=open(testDataFile,'r')
for line in fi:
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
tScore=float(arr[2].strip())
N[uid,iid]=tScore
list_testusers.append(int(arr[0].strip()))
fi.close()
list_testusers = sorted(list(set(list_testusers)))
read_file_without_scores(trainDataFile)
LFM(testDataFile, trainDataFile, modelSaveFile0, userNum, iteNum)
fi=open(modelSaveFile0,'rb')
qi = pickle.load(fi)
pu = pickle.load(fi)
fi.close()
for i in range(userNum):
for j in range(iteNum):
P_rating[i,j] = sum([a*b for a, b in zip(pu[i], qi[j])])
#print("final_map",final_map)
#print("final_mrr",final_mrr)
iterations=5000
print("trained LFM")
BPRMF(trainDataFile, resultSaveFile, '\t', 10,iterations)
#拆分验证集和测试集
# Check performance by plotting train and test errors
plt.figure(figsize=(10,6))
plt.plot(range(iterations), final_map, marker='*', label='MAP')
plt.plot(range(iterations), final_mrr, marker='v', label='MRR')
#plt.plot(range(iterations), final_f1, marker='*', label='F1')
plt.title('ML-1M')
plt.xlabel('Number of iterations')
plt.ylabel('MAP/MRR')
plt.legend(loc='upper left')
plt.grid()
plt.show()

Опубликовать ( 0 )

Вы можете оставить комментарий после Вход в систему

1
https://api.gitlife.ru/oschina-mirror/xia_zhaoqiang-rbpr-recomender-systems.git
git@api.gitlife.ru:oschina-mirror/xia_zhaoqiang-rbpr-recomender-systems.git
oschina-mirror
xia_zhaoqiang-rbpr-recomender-systems
xia_zhaoqiang-rbpr-recomender-systems
master