Apri 알고리즘 Python 구현

2002 단어 python
#!C:/Python27/python.exe
#coding=gbk
import sys
__author__ = "junfeng_feng"
"""Python  Apri  
input:           
ouput:           

Usage:python Apri.py filename min_surpport
Exampe: python Apri.py data.txt 2

3   
1、  Python  70    ,       Apri  
2、        (Python          )
3、    ,   

     :
1、     ,     
2、   apri,    

data.txt    
A	C	D
B	C	E
A	B	C	E
B	E
"""

#     C1
#return:  key=item;value=item     
def getC1(srcdata): 
    c1 = {} 
    for transaction in srcdata: 
        for item in transaction: 
            key = frozenset(set([item])) #frozenset        key
            #  item
            if key in c1: 
                c1[key] = c1[key] + 1 
            else: 
                c1[key] = 1 
    return c1 

#return:            
def getL(c, supct): 
    #           item
    for key in [item for item in c if c[item] < supct]: 
        del c[key] 
    return c 

#     L     C 
#     ,  item
def getnextcandi(preL, srcdata): 
    c = {} 
    for key1 in preL: 
        for key2 in preL: 
            if key1 != key2: 
                # preL   preL        
                key = key1.union(key2) 
                c[key] = 0 
    #  item 
    for i in srcdata: 
        for item in c: 
            if item.issubset(i): 
                c[item] = c[item] + 1 
    return c 

# Apriori    
def Apriori(filename, supct): 
    #      
    #    :      ,          Tab(\t)  
    srcdata = [line.strip().split("\t") for line in file(filename)]
    c = getC1(srcdata) 
    L = {} 
    while True: 
        temp_L = getL(c, supct) 
        if not temp_L: 
            break 
        else: 
            L = temp_L 
        #    L,     c 
        c = getnextcandi(L, srcdata) 
    return L

if __name__ == "__main__":
    if len(sys.argv) == 3:
        #Usage:   apri.py filename surpport
        print Apriori(sys.argv[0], sys.argv[1])
    else:
        #for example
        print Apriori("awk.txt", 8)

좋은 웹페이지 즐겨찾기