정보 엔트로피 계산(자신이 작성한python 코드, 쓰레기, 고수 우회)
1296 단어 인공 지능
# -*- coding:utf-8 -*-
'''
Created on 2017 9 15
@author: snow
'''
import csv;
import math;
fileName = "AllElectronics.csv";
def allData():
csv_reader = csv.reader(open(fileName, encoding='UTF-8'));
fileContent = [];
for row in csv_reader:
fileContent.append(row);
headers = fileContent[0];
dataContent = [];
labels=[];
for i in range(1,len(fileContent)):
dataContent.append(fileContent[i][-1]);
labels.append(fileContent[i][-1]);
dataSet = [];
for row in (dataContent):
rowData=row[1:len(row)-1];
dataSet.append(rowData);
return headers,dataContent,labels,dataSet;
headers,dataContent,labels,dataSet = allData();
numEntries = len(labels);
def calEnt(labels):
labelCounts={};
for lable in labels:
if lable not in labelCounts.keys():
labelCounts[lable] = 0;
labelCounts[lable]+=1;
shannonEnt=0.0;
for key in labelCounts.keys():
print(labelCounts[key]);
prob = float(labelCounts[key])/numEntries;
shannonEnt -= prob * math.log(prob,2) # 2
return shannonEnt
res = calEnt(labels);
print(res);