Слияние кода завершено, страница обновится автоматически
#Ver1.0
#Zero @2012.5.2
#
# -*- coding: utf-8 -*-
import math
import random
import pickle
from decimal import *
import numpy as np
def read_file_without_scores(file_to_read, space_type='\t'):
dict_items = dict()
dict_not_items = dict()
list_users = list()
list_items = list()
cont = 0
fi=open(file_to_read, 'r')
for line in fi:
if line.strip():
cont += 1
inline = line.split(space_type)
list_users.append(int(inline[0]))
list_items.append(int(inline[1]))
if int(inline[0]) in dict_items:
dict_items[int(inline[0])] += [int(inline[1])]
else:
dict_items[int(inline[0])] = [int(inline[1])]
fi.close()
list_users = sorted(list(set(list_users))) #set删除重复元素, sorted 从小到大排序
list_items = sorted(list(set(list_items))) #list_users按顺序存储所有的用户,list_items按顺序存储所有评价过的项目
for user in list_users:
dict_not_items[user] = list(set(list_items) - set(dict_items[user])) #dict_not_items存储所有的用户未评分的项目
return dict_items, dict_not_items, list_users, list_items, cont, len(list_users), len(list_items)
__author__ = 'Arthur Fortes'
learn_rate = 0.01
learn_rate1= 0.01
regularization = 1
reg_u = 0.0025
reg_i = 0.0025
reg_j = 0.00025
reg_bias = 0
getcontext().prec = 20
#number_int = 15000
number_int = 70000
def create_bias(bias_size):
bias = np.random.uniform(0, 1, bias_size)
return bias
def create_factors(num_users, num_items, factors):
users_factors = np.random.uniform(0, 1, [num_users, factors])
items_factors = np.random.uniform(0, 1, [num_items, factors])
return users_factors, items_factors
class BPRMF(object):
def __init__(self, file_to_train, file_to_write, space_type='\t', num_factors=10, num_interactions=30):
self.file_to_train = file_to_train
self.file_to_write = file_to_write
self.num_factors = num_factors
self.num_interactions = num_interactions
dict_items, dict_not_items, list_users, list_items, num_int, num_users, num_items = read_file_without_scores(
file_to_train, space_type)
self.dict_items = dict_items
self.dict_not_items = dict_not_items
self.list_users = list_users
self.list_items = list_items
self.num_int = num_int
self.num_users = num_users
self.num_items = num_items
self.bias_items = create_bias(num_items)
self.u_factors, self.i_factors = create_factors(num_users, num_items, num_factors)
print("Training data: " + str(num_users) + " users | " + str(num_items) + " items | " +
str(num_int) + " interactions...")
print("BPRMF num_factors=" + str(num_factors) + " | bias_reg=" + str(reg_bias) + " | reg_u=" + str(reg_u) +
" | reg_i=" + str(reg_i) + " | reg_j=" + str(reg_j) + " | learn rate= " + str(learn_rate))
self.train_bprmf()
print('Trained')
#self.predict_score()
def sample_triple(self):
user = random.choice(self.list_users)
item = random.choice(self.dict_items[user])
other_item = random.choice(self.dict_not_items[user])
user_id = self.list_users.index(user)
item_id = self.list_items.index(item)
other_item_id = self.list_items.index(other_item)
'''
i=1
while(i>0):
item = random.choice(self.dict_items[user])
other_item = random.choice(self.dict_items[user])
if V[user-1,item-1]>V[user-1, other_item-1]:
i=0
else:
i=1
user_id = self.list_users.index(user)
item_id = self.list_items.index(item)
other_item_id = self.list_items.index(other_item)
'''
return user, item, user_id, item_id, other_item_id
'''
def run(self):
self.train_bprmf()
print('Trained')
self.predict_bprmf()
'''
def train_bprmf(self):
rmse_result=[]
for i in range(self.num_interactions):
self.iterate_bprmf()
print("i, rui=bpr-rmse, BPR",i)
self.predict()
self.predict_bprmf1()
#
#self.predict_score()
#rmse_result.append(rmse)
#print("step, rmse:",i, rmse_result)
def iterate_bprmf(self):
i = 0
for _ in range(number_int):
i += 1
user,item, user_id, item_id, other_item_id = self.sample_triple()
self.update_factors_bprmf(user, item, user_id, item_id, other_item_id)
return self.u_factors, self.i_factors, self.bias_items
def update_factors_bprmf(self, user, item, u, i, j):
rui = self.bias_items[i] + sum(np.array(self.u_factors[u]).T * np.array(self.i_factors[i]))
ruj = self.bias_items[j] + sum(np.array(self.u_factors[u]).T * np.array(self.i_factors[j]))
uid=user-1
iid=item-1
#eui= V[uid,iid]
#eui=(2**eui - 1)/2**5
eui=1
x_uij = eui*(rui - ruj)
#print("x_uij:",x_uij)
if x_uij<-700:
fun_exp=1
else:
temp=math.exp(-x_uij)
fun_exp = float(temp) / float((1.0 + temp))
tn = 2*(rui - V[uid,iid])
update_bias_i = eui*fun_exp - reg_bias * self.bias_items[i] #- tn
self.bias_items[i] += learn_rate * update_bias_i
update_bias_j = -eui*fun_exp - reg_bias * self.bias_items[j]
self.bias_items[j] += learn_rate * update_bias_j
for num in range(self.num_factors):
w_uf = self.u_factors[u][num]
h_if = self.i_factors[i][num]
h_jf = self.i_factors[j][num]
update_user = eui*(h_if - h_jf) * fun_exp - reg_u * w_uf #- tn*h_if
self.u_factors[u][num] = w_uf + learn_rate * update_user
#self.u_factors[u][num]+= learn_rate1*(h_if - regularization * self.u_factors[u][num])
#if self.u_factors[u][num]<0:
# self.u_factors[u][num]=0
update_item_i = eui*w_uf * fun_exp - reg_i * h_if #- tn*w_uf
self.i_factors[i][num] = h_if + learn_rate * update_item_i
#self.i_factors[i][num]+= learn_rate1 * (w_uf - regularization * self.i_factors[i][num])
update_item_j = -eui*w_uf * fun_exp - reg_j * h_jf
self.i_factors[j][num] = h_jf + learn_rate * update_item_j
def predict_bprmf(self):
final_rank = list()
for user in self.list_users:
#print(user)
score_items = list()
u = self.list_users.index(user)
for item in self.dict_not_items[user]:
i = self.list_items.index(item)
rui = self.bias_items[i] + sum([a*b for a, b in zip(self.u_factors[u], self.i_factors[i])])
# rui = self.bias_items[i] + sum(np.array(self.u_factors[u]).T * np.array(self.i_factors[i]))
score_items.append([i, rui])
print("score_items:",score_items)
list_items = sorted(score_items, key=lambda x: -x[1]) #按照score_items第二个元素进行排序
final_rank.append([user, list_items[:10]])
print('Writing ranks...')
with open(self.file_to_write, 'w') as infile_write:
for user in final_rank:
for item in user[1]:
infile_write.write(str(user[0]) + '\t' + str(item[0]) + '\t' + str(item[1]) + '\n')
def predict(self):
X= []
Y= []
bias_i= np.zeros(1682)
for user in range(943):
x=np.zeros(self.num_factors)
if user+1 in self.list_users:
u=self.list_users.index(user+1)
x=self.u_factors[u]
X.append(x)
for item in range(1682):
y=np.zeros(self.num_factors)
if item+1 in self.list_items:
i=self.list_items.index(item+1)
y=self.i_factors[i]
bias_i[item]=self.bias_items[i]
Y.append(y)
fi=open(modelSaveFile,'wb')
fi.truncate()
pickle.dump(bias_i, fi, True)
pickle.dump(X, fi, True)
pickle.dump(Y, fi, True)
fi.close()
print("model generation over")
qi_lfm=np.zeros((1682,self.num_factors))
pu_lfm=np.zeros((943,self.num_factors))
fi=open(modelSaveFile0,'rb')
qi_lfm=pickle.load(fi)
pu_lfm=pickle.load(fi)
fi.close()
Score_max=0
'''
fi=open(testDataFile,'r')
for line in fi:
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
pScore=bias_i[iid] + sum([a*b for a, b in zip(X[uid], Y[iid])])
T[uid,iid]=pScore
if pScore>Score_max:
Score_max=pScore
fi.close()
'''
for i in range(943):
item_test=list()
final_test=[]
rank_lfm=list()
for j in range(1682):
if N[i,j]!=0:
final_test.append(j)
for j in range(1682):
T[i,j]=bias_i[j] + sum([a*b for a, b in zip(X[i], Y[j])])
item_test.append([j,T[i,j]])
Rank_LFM[i,j]=sum([a*b for a, b in zip(pu_lfm[i], qi_lfm[j])])
rank_lfm.append([j, Rank_LFM[i,j]])
for rank,ii in enumerate(sorted(item_test, key=lambda s: s[1], reverse=True)):
for aa in final_test:
if aa in ii:
#print("rank, aa, ii",rank, aa, ii)
T[i,aa]=rank+1
#rank_test.append([aa,1/(rank+1)])
for rank1,iia in enumerate(sorted(rank_lfm, key=lambda s: s[1], reverse=True)):
for bb in final_test:
if bb in iia:
#print("rank, aa, ii",rank, aa, ii)
Rank_LFM[i,bb]= rank1+1
'''
for i in range(943):
ui_max=0
ui_min=1000
for j in range(1682):
if T[i,j]>ui_max:
ui_max=T[i,j]
if T[i,j]<ui_min:
ui_min=T[i,j]
if ui_max!=0:
for k in range(1682):
T[i,k]=(T[i,k])/(ui_max-ui_min) #转化到logistic函数的[-2,4]区间
'''
fi=open(testDataFile, "r")
RMSE=0
MAE=0
cnt=0
positive_num=0
negative_num=0
for line in fi:
cnt+=1
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
score=int(arr[2].strip())
#prediction0= (bias_i[iid] + sum([a*b for a, b in zip(X[uid], Y[iid])]))/Score_max
prediction0 = sum([a*b for a, b in zip(pu_lfm[uid], qi_lfm[iid])])
prediction1 = 4*(1682-T[uid,iid])/1681
prediction2 = 4*(1682-(T[uid,iid] - Rank_LFM[uid,iid]))/1681
if prediction0<1:
prediction0=1
'''
#if abs(T[uid,iid]-Rank_LFM[uid,iid])<200:
if (T[uid,iid]-Rank_LFM[uid,iid])>0:
prediction = prediction0-5/prediction1 #math.floor(prediction0) #向下取整
else:
prediction = prediction0+5/prediction1 #math.ceil(prediction0) #向上取整
'''
'''
if (T[uid,iid]-Rank_LFM[uid,iid])>0 and (prediction0 - score)>0 or (T[uid,iid]-Rank_LFM[uid,iid])<0 and (prediction0 - score)<0:
positive_num+=1
else:
negative_num+=1
'''
xis=min(T[uid,iid],Rank_LFM[uid,iid])/max(T[uid,iid],Rank_LFM[uid,iid])
prediction= (0.5*xis)*prediction1 + (1-0.5*xis)*prediction0
#prediction=min(5, prediction)
#prediction=5/1682 * prediction1
#prediction = 1/prediction1*(prediction0) + (1 - 1/prediction1)*average_u[uid]
#prediction = bias_i[iid] + sum([a*b for a, b in zip((pu_lfm[uid]+0.1*X[uid]), (qi_lfm[iid]+0.1*Y[iid]))])
#prediction = T[uid,iid]*4 + 1
eui=score - prediction
#print("score, prediction0,prediction1,Rank_LFM[uid,iid], average_u, eui:" , score,prediction0,prediction2, prediction1,T[uid,iid], Rank_LFM[uid,iid], average_u[uid], eui)
MAE+=abs(eui)
RMSE+=eui*eui
MAE/=cnt
RMSE=math.sqrt(RMSE/cnt)
print("RMSE, MAE:",positive_num, negative_num, RMSE,MAE)
return RMSE
def predict_bprmf1(self):
final_rank = list()
pre_5 =[]
map_5 =[]
MRR=0
PRE=0
MAP=0
mrr = []
Top_k = 5
fi=open(modelSaveFile,'rb')
bias_i=pickle.load(fi)
pu=pickle.load(fi)
qi=pickle.load(fi)
fi.close()
fi=open(modelSaveFile0,'rb')
qi_lfm=pickle.load(fi)
pu_lfm=pickle.load(fi)
fi.close()
ka=0
for user in self.list_users:
#print(user)
item_test=[]
final_test=list()
final_item=[]
for j in range(1682):
if N[user-1,j]>0:
item_test.append([(j+1),N[user-1,j]])
#final_item.append(j+1)
final_test = sorted(item_test, key=lambda x: -x[1])
final_item = [x[0] for x in final_test[:Top_k]]
#print("final_item",final_item)
if len(final_item)==0:
ka+=1
continue
score_items = list()
u = self.list_users.index(user)
for item in self.dict_not_items[user]:
i = self.list_items.index(item)
#rui = self.bias_items[i] + sum([a*b for a, b in zip(self.u_factors[u], self.i_factors[i])])
#rui = self.bias_items[i] + sum(np.array(self.u_factors[u]).T * np.array(self.i_factors[i]))
rui = self.bias_items[i] + sum(np.array(self.u_factors[u]+pu_lfm[user-1]).T * np.array(self.i_factors[i]+qi_lfm[item-1]))
#rui = bias_i[i-1] + sum([a*b for a, b in zip(pu[user-1], qi[item-1])])
score_items.append([item,rui])
#if user<2:
#print("score_items[:-1]",sorted(score_items, key=lambda s: s[1], reverse=True))
#list_items = sorted(score_items, key=lambda x: -x[1])
#print("list_items",list_items)
#final_rank.append([user, list_items[:10]])
#print("final_rank",final_rank)
#final_rank.append([user, list_items[:]])
for rank,ii in enumerate(sorted(score_items, key=lambda s: s[1], reverse=True)):
#print("rank,ii",rank,ii)
tmk=0
for aa in final_item:
if aa in ii:
#print("rank, aa, ii",rank, aa, ii)
mrr.append(1.0/(rank+1))
tmk = 1
break
if tmk==0:
mrr.append(0)
break
pre5=0
true_num=0
count_i=0
ap=[]
for rank1,item1 in enumerate(sorted(score_items, key=lambda s: s[1], reverse=True)):
count_i=count_i+1
k_flag=0
for bb in final_item:
if bb in item1:
true_num=true_num+1
ap.append(true_num/(rank1+1))
k_flag=1
break
if k_flag==0:
ap.append(0)
if count_i==Top_k:
pre_5.append(true_num/Top_k)
#print("true_num,rank1",true_num,rank1)
if true_num==0:
map_5.append(0)
else:
map_5.append(sum(ap)/len(ap))
#print("count_i,pre_5",count_i,pre_5)
break
if len(pre_5)==0:
PRE=0
else:
PRE=np.mean(pre_5)
#print("pre_5",pre_5)
if sum(map_5)==0:
MAP=0
else:
MAP=np.mean(map_5)
#print("len(map_5)",map_5)
if len(mrr)==0:
MRR=0
else:
MRR=np.mean(mrr)
print("MRR,PRE, MAP",ka,len(mrr),len(pre_5),len(map_5),MRR,PRE, MAP)
return( MRR,PRE, MAP)
def predict_score(self):
bias_i =[0.0 for i in range(1682)]
pu=np.zeros((943,self.num_factors))
qi=np.zeros((1682,self.num_factors))
fi=open(modelSaveFile,'rb')
bias_i=pickle.load(fi)
pu=pickle.load(fi)
qi=pickle.load(fi)
fi.close()
qi_lfm=np.zeros((1682,self.num_factors))
pu_lfm=np.zeros((943,self.num_factors))
fi=open(modelSaveFile0,'rb')
qi_lfm=pickle.load(fi)
pu_lfm=pickle.load(fi)
fi.close()
#train
Score_max=0
fi=open(trainDataFile,'r')
for line in fi:
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
pScore=bias_i[iid] + sum([a*b for a, b in zip(pu[uid], qi[iid])])
if pScore>Score_max:
Score_max=pScore
fi.close()
for step in range(1):
cnt_train=0
rmse_train=0
MAE_train=0
fi=open(trainDataFile,'r')
for line in fi:
cnt_train+=1
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
tScore=int(arr[2].strip())
pScore_lfm=sum([a*b for a, b in zip(pu_lfm[uid], qi_lfm[iid])])
pScore=bias_i[iid] + sum([a*b for a, b in zip(pu[uid], qi[iid])])
#pScore=pScore*5/Score_max
eui=tScore-pScore
MAE_train+=abs(eui)
rmse_train+=eui*eui
fi.close()
MAE_train/=cnt_train
rmse_train=math.sqrt(rmse_train/cnt_train)
print("step,MAE_train,rmse_train %d: %f: %f:"%(step,MAE_train, rmse_train))
#predict
fi=open(testDataFile,'r')
MAE=0
rmse=0
rmse_av=0
rmse_compare=0
cnt=0
for line in fi:
cnt+=1
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
tScore=int(arr[2].strip())
pScore_lfm=sum([a*b for a, b in zip(pu_lfm[uid], qi_lfm[iid])])
pScore=bias_i[iid] + sum([a*b for a, b in zip(pu[uid], qi[iid])])
pScore = pScore*5/Score_max
if pScore<1:
pScore=1
if pScore_lfm>5:
pScore_lfm=5
eui=tScore-pScore
rmse+=eui*eui
rmse_av+=(tScore-pScore_lfm)*(tScore-pScore_lfm)
MAE+=abs(eui)
#print("tScore,pScore,eui, %f: %f: %f:"%(tScore,pScore,eui))
fi.close()
MAE/=cnt
rmse=math.sqrt(rmse/cnt)
rmse_av=math.sqrt(rmse_av/cnt)
print("MAE, rmse_test, rmse_av %f: %f: %f: " %(MAE, rmse, rmse_av ))
#return rmse
if __name__ == '__main__':
trainDataFile = 'movielens\\u1.base'
testDataFile = 'movielens\\u1.test'
resultSaveFile = 'BPRMF_Result.txt'
modelSaveFile = 'BPRMF_model_50.pkl'
modelSaveFile0 = 'LFM_model_50.pkl'
V=np.zeros((943,1682))
N=np.zeros((943,1682))
T=np.zeros((943,1682))
Rank_LFM=np.zeros((943,1682))
Num_train = np.zeros(943)
average_u = np.zeros(943)
fi=open(trainDataFile,'r')
for line in fi:
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
tScore=int(arr[2].strip())
V[uid,iid]=tScore
fi.close()
fi=open(testDataFile,'r')
for line in fi:
arr=line.split()
uid=int(arr[0].strip())-1
iid=int(arr[1].strip())-1
tScore=int(arr[2].strip())
N[uid,iid]=tScore
fi.close()
for i in range(943):
temp = 0
num=0
for j in range(1682):
if V[i,j]!=0:
temp+=V[i,j]
num+=1
if num==0:
average_u[i]=0
else:
average_u[i]= temp/num
Num_train[i]=num
#read_file_without_scores(trainDataFile)
BPRMF(trainDataFile, resultSaveFile, '\t', 50,500)
#拆分验证集和测试集
Вы можете оставить комментарий после Вход в систему
Неприемлемый контент может быть отображен здесь и не будет показан на странице. Вы можете проверить и изменить его с помощью соответствующей функции редактирования.
Если вы подтверждаете, что содержание не содержит непристойной лексики/перенаправления на рекламу/насилия/вульгарной порнографии/нарушений/пиратства/ложного/незначительного или незаконного контента, связанного с национальными законами и предписаниями, вы можете нажать «Отправить» для подачи апелляции, и мы обработаем ее как можно скорее.
Опубликовать ( 0 )