추천 시스템 실천 코드 실현

6065 단어 기계 학습
import random
import math
from operator import itemgetter

def Splitdata(data, M, k, seed):
    test = dict()
    train = dict()
    random.seed(seed)
    for user, item in data:
        rdm = random.randint(0, M)
        if rdm == k:
            if user not in test:
                test[user] = set()
            test[user].add(item)

            # test.append([user, item])
        else:
            if user not in train:
                train[user] = set()
            train[user].add(item)

            # train.append([user, item])
    return train, test

def Recall(train, test, N, K):
    hit = 0
    all = 0
    W = UserSimilarity(train)

    for user in train.keys():
        if user in test:
            tu = test[user]
            rank = Recommend(user, train, W, K)
            rk = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N]
            for item, pui in rk:
                if item in tu:
                    hit += 1
            all += len(tu)
    return hit / (all * 1.0)

def Precision(train, test, N, K):
    hit = 0
    all = 0
    W = UserSimilarity(train)

    for user in train.keys:
        tu = test[user]
        rank = Recommend(user, train, W, K)
        rk = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N]
        for item, pui in rk:
            if item in tu:
                hit += 1
        all += N
    return hit / (all * 1.0)

def Coverage(train, test, N, K):
    recommend_items = set()
    all_items = set()
    W = UserSimilarity(train)
    for user in train.keys:
        for item in train[user]:
            all_items.add(item)
        rank = Recommend(user, train, W, K)
        rk = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N]
        for item, pui in rk:
            recommend_items.add(item)
    return len(recommend_items) / (len(all_items)*1.0)

def popularity(train, test, N, K):
    item_popularity = dict()
    for user, items in train.items():
        for item in items:
            if item not in item_popularity:
                item_popularity[item] = 0
            item_popularity[item] += 1
    ret = 0
    n = 0
    W = UserSimilarity(train)
    for user in train.keys():
        rank = Recommend(user, train, W, K)
        rk = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N]
        for item, pui in rk:
            ret += math.log(1 + item_popularity[item])
            n += 1
    ret /= n * 1.0
    return ret

def UserSimilarity(train):
    item_users = dict()
    # print(train.items())
    for u, items in train.items():
        for i in items:
            if i not in item_users:
                item_users[i] = set()
            item_users[i].add(u)

    C = dict()
    N = dict()
    for i, users in item_users.items():
        for u in users:
            if u not in N:
                N[u] = 0
            N[u] += 1
            for v in users:
                if u == v:
                    continue
                if u not in C:
                    C[u] = dict()
                if v not in C[u]:
                    val = 1 / math.log(1 + len(users))
                    C[u].update({v:val})
                else:
                    val = C[u][v] + 1 / math.log(1 + len(users))
                    C[u].update({v: val})

    W = dict()
    for u, related_users in C.items():
        if u not in W:
            W[u] = dict()
        for v, cuv in related_users.items():
            if v not in W[u]:
                val = cuv / math.sqrt(N[u] * N[v])
                W[u].update({v:val})
    return W

def Recommend(user, train, W, K):
    rank = dict()
    interacted_items = train[user]
    li = W[user].items()
    for v, wuv in sorted(W[user].items(), key=itemgetter(1), reverse=True)[0:K]:
        for i in train[v]:
            if i not in interacted_items:
                if(i in rank):
                    rank[i] += wuv
                else:
                    rank[i] = wuv
    return rank


def ItemSimilarity(train):
    C = dict()
    N = dict()
    for u, items in train.items():
        for i in items:
            if i not in N:
                N[i] = 0
            N[i] += 1
            for j in items:
                if i == j:
                    continue
                if i not in C:
                    C[i] = dict()
                if j not in C[i]:
                    val = 1 / math.log(1 + len(items)*1.0)
                    C[i].update({j: val})
                else:
                    val = C[i][j] + 1 / math.log(1 + len(items)*1.0)
                    C[i].update({j: val})
    W = dict()
    for i, related_items in C.items():
        for j, cij in related_items.items():
            if i not in W:
                W[i] = dict()

            val = cij / math.sqrt(N[i] * N[j])
            W[i].update({j: val})

    return W

def ItemCFRecommend(train, user_id, W, K):
    rank = dict()
    ru = train[user_id]
    for i in ru:
        for j, wj in sorted(W[i].items(), key=itemgetter(1), reverse=True)[0:K]:
            if j in ru:
                continue
            if j not in rank:
                rank[j] = wj
            else:
                rank[j] +=  wj
    return rank

def RandomSelectNegativeSample(self, items):
    ret = dict()
    for i in items.keys():
        ret[i] = 1
    n = 0
    for i in range(0, len(items) * 3):
        item = items_

path = 'F:\\Project\\python\\ml-100k\\u.data'
datalines = open(path)
data = []
for line in datalines.readlines():
    arr = line.split('\t')
    data.append((arr[0], arr[1]))

trn, tst = Splitdata(data, 10, 1, 10)
print(len(trn))
print(len(tst))
itemW = ItemSimilarity(trn)
rk = ItemCFRecommend(trn, '1', itemW, 5)
print(rk)

#
# recall = Recall(trn, tst, 100, 80)
# print('recall: ', recall)
# print 'recall: '+ recall
# Wtmp = UserSimilarity(trn)
# rk = Recommend('1', trn, Wtmp, 3)
# print(rk)

# t = dict()
# t['A'] = 1
# t['B'] = 2
# print(t)
# r = sorted(t.items(), key=itemgetter(1),reverse=True)
# print(r)
# for a1, a2 in r:
#     print(a1,a2)

좋은 웹페이지 즐겨찾기