사전류 데이터 추천 시스템 설계

1350 단어
#  
def pearson_sim(target_user,other_user,data):
    rating1=data[other_user]
    rating2=data[target_user]
    from math import sqrt
    sum_x=0
    sum_y=0
    sum_xy=0
    n=0
    sum_x2=0
    sum_y2=0
    for key in rating1:
        if key in rating2:
            x=rating1[key]
            y=rating2[key]
            sum_xy +=x*y#sum_xy=sum_xy+x*y
            sum_x +=x
            sum_y +=y
            n+=1
            sum_x2 +=pow(x,2)
            sum_y2 += pow(y,2)
    fenzi=n*sum_xy-sum_x*sum_y
    fenmu=sqrt(n*sum_x2-pow(sum_x,2))*sqrt(n*sum_y2-pow(sum_y,2))
    if fenmu==0:
        return 0
    else:
        sim=fenzi/fenmu
    return sim
#  topk 
def get_neighbor(data,target_user,k=3):
    # target_user user 
    sim=[]
    for user in data:
        if user !=target_user:
            s=pearson_sim(user,' ',data)
            sim.append((s,user))
    # topk 
    sim.sort(reverse=True)# 
    neighbor=sim[:k]
    return neighbor
#  
def rec(target_user,data):
    neighbor=get_neighbor(data,target_user)# 
    # , 
    rec=[]
    for i in neighbor:
        for movie in data[i[1]]:
            if not movie in data[target_user]:
                rec.append((movie,data[i[1]][movie]))
    rec_sort=sorted(rec,key=lambda x:x[1],reverse=True)# 
    final_rec=list(set(rec_sort))# 
    return final_rec

좋은 웹페이지 즐겨찾기