데이터 마 이 닝 의 FP - tree 알고리즘
이 논문 은 Jiawei Han 의 대작 으로 또 한 명의 중국인 이 북 미 에 갔다.
그래 픽 인터페이스 프로젝트 + 테스트 용례 스탬프 여기http://download.csdn.net/detail/michealtx/4266155
콘 솔 버 전 C + + 코드 는 다음 과 같 습 니 다.
#include <iostream>
#include <sstream>
#include <fstream>
#include <vector>
#include <map>
#include <list>
#include <string>
#include <algorithm>
#include <ctime>
using namespace std;
struct FPNode
{
string nodeName;
unsigned int count;
FPNode *parentNode;
vector<FPNode *> childNode;
FPNode *siblingNode;
bool operator < (const FPNode &other) const
{
return count>other.count;
}
};
// database ,fileName char* , string ,in()
bool ObtainDatabase(vector<vector<string> > &database,char *fileName)
{
/* vector<string> data;
data.push_back(1);data.push_back(2);data.push_back(5);
database.push_back(data);
data.clear();
data.push_back(2);data.push_back(4);
database.push_back(data);
data.clear();
data.push_back(2);data.push_back(3);
database.push_back(data);
data.clear();
data.push_back(1);data.push_back(2);data.push_back(4);
database.push_back(data);
data.clear();
data.push_back(1);data.push_back(3);
database.push_back(data);
data.clear();
data.push_back(2);data.push_back(3);
database.push_back(data);
data.clear();
data.push_back(1);data.push_back(3);
database.push_back(data);
data.clear();
data.push_back(1);data.push_back(2);data.push_back(3);data.push_back(5);
database.push_back(data);
data.clear();
data.push_back(1);data.push_back(2);data.push_back(3);
database.push_back(data);
return true;
*/
ifstream in(fileName);
if(!in)
{
cout<<" !"<<endl;
return false;
}
string s="";
int i=0;
while(getline(in,s))
{//
i++;
vector<string> transaction;
int len=s.length();
string str="";
for(int i=0;i<len;i++)
{//
if(s[i]!=' ')
{
str+=s[i];
}
else if(s[i]==' '||i==len-1)
{
transaction.push_back(str);
str="";
}
}
database.push_back(transaction);
s="";
}cout<<i;system("pause");
return true;
}
///////////////////////////////////////////
// , 1-
void CreateItemset(vector<vector<string> > &database,vector<FPNode> &largeItemset,unsigned int minSupport)
{
map<string,int> dir;
map<string,int>::iterator dirIt;
vector<vector<string> >::iterator databaseIt;
vector<string> temp;
vector<string>::iterator tempIt;
// , <item,count>
for(databaseIt=database.begin();databaseIt!=database.end();databaseIt++)
{
temp=*databaseIt;
for(tempIt=temp.begin();tempIt!=temp.end();tempIt++)
{
string item=*tempIt;
dirIt=dir.find(item);
if(dirIt==dir.end())
{//item dir
dir.insert(pair<string,int>(item,1));
}
else
{//item dir , count 1
(dirIt->second)++;
}
}
}
// minSopport item
for(dirIt=dir.begin();dirIt!=dir.end();dirIt++)
{
if(dirIt->second>=minSupport)
{
FPNode large;
large.nodeName=dirIt->first;
large.count=dirIt->second;
large.parentNode=NULL;
(large.childNode).clear();
large.siblingNode=NULL;
largeItemset.push_back(large);
}
}
// count
sort(largeItemset.begin(),largeItemset.end());
}
//
void OutputLargeItemset(vector<FPNode> &largeItemset,unsigned int i)
{
cout<<" "<<largeItemset.size()<<" "<<i<<"- :"<<endl;
vector<FPNode>::iterator largeItemsetIt;
int j=0;
for(largeItemsetIt=largeItemset.begin();largeItemsetIt!=largeItemset.end();largeItemsetIt++)
{
FPNode temp=*largeItemsetIt;
cout<<"{ ";
cout<<temp.nodeName<<" : "<<temp.count;
cout<<" }";
j++;
if(j%4==0)
{
cout<<endl;
}
}
cout<<endl<<endl;
}
//////////////////////////////////////////////////////////
// trans 1- large1 , 1- large1 , tempTrans
void SortItem(vector<string> &trans,vector<string> &tempTrans,vector<FPNode> &large1)
{
unsigned int sizeLarge=large1.size();
unsigned int sizeTrans=trans.size();
for(int i=0;i<sizeLarge;i++)
{
FPNode tempNode=large1[i];
for(int j=0;j<sizeTrans;j++)
{
if(tempNode.nodeName==trans[j])
{
tempTrans.push_back(tempNode.nodeName);
}
}
}
}
// transTemp[k] root
int AddNode(FPNode *parentNode,vector<string> &transTemp,unsigned int k,vector<FPNode> &large1)
{
unsigned int size=transTemp.size();
if(size>0&&(k>=0&&k<size))
{//cout<<"addNode :"<<parentNode->nodeName<<endl;
FPNode *nodeTemp=new FPNode;//
if(nodeTemp==NULL)
{
return 1;
}
nodeTemp->nodeName=transTemp[k];
nodeTemp->count=1;
nodeTemp->parentNode=parentNode;
nodeTemp->siblingNode=NULL;
(parentNode->childNode).push_back(nodeTemp);
// nodeTemp 。
unsigned int sizeLarge=large1.size();
for(int i=0;i<sizeLarge;i++)
{
if((large1[i]).nodeName==nodeTemp->nodeName)
{
FPNode *temp=&(large1[i]);
while(temp->siblingNode!=NULL)
{
temp=temp->siblingNode;
}
temp->siblingNode=nodeTemp;//cout<<"brother";system("pause");
break;
}
}
k++;
if(k<size)
{//
AddNode(nodeTemp,transTemp,k,large1);
}
}
return 0;
}
// FP-Tree
int CreateFPTree(vector<vector<string> > &database,vector<FPNode> &large1,FPNode **treeRoot)
{
*treeRoot=new FPNode;
FPNode *root=*treeRoot;
if(root==NULL)
{
return 1;
}
root->nodeName="-1";//-1
root->count=0;
root->parentNode=NULL;
(root->childNode).clear();
root->siblingNode=NULL;
unsigned int sizeDatabase=database.size();
for(int i=0;i<sizeDatabase;i++)
{
vector<string> trans=database[i];//
vector<string> transTemp;
SortItem(trans,transTemp,large1);// large1 , transTemp
/*
// trans transTemp
int da=trans.size();
cout<<"trans:";
for(int i=0;i<da;i++)cout<<trans[i]<<" ";
cout<<endl<<"transTemp:";
da=transTemp.size();
for(int i=0;i<da;i++)cout<<transTemp[i]<<" ";
cout<<endl;
//system("pause");
*/
vector<FPNode *> childNode=root->childNode;//
//cout<<" "<<childNode.size()<<" "<<endl;
if(childNode.size()==0)
{// , 。 root 。
AddNode(root,transTemp,0,large1);//cout<<(root->childNode).size()<<" "<<endl;
}
else
{//
unsigned int sizeTrans=transTemp.size();
FPNode *pt=NULL;
for(int i=0;i<sizeTrans;i++)
{// transTemp[i]
string temp=transTemp[i];
unsigned int sizeChild=childNode.size();
int j=0;
for(j=0;j<sizeChild;j++)
{
if(temp==(childNode[j])->nodeName)
{
pt=childNode[j];
(pt->count)++;//transTemp[i] , 1
//cout<<" transTemp[i] "<<pt->nodeName<<endl;
childNode=pt->childNode;
break;
}
}
if(j==sizeChild)
{
if(pt==NULL)
{//transTemp[i] , i ,
AddNode(root,transTemp,i,large1);
}
else
{//transTemp[i] , ,
AddNode(pt,transTemp,i,large1);// i+1, !
}
break;
}
}
}
}
return 0;
}
/////////////////////////////////////////////////////////////////
// large1 conditional pattern base 。 。
void CreateConditionalPatterBase(vector<FPNode> &large1,map<string,vector<vector<string> > > &cpbMap)
{//if(large1.empty()){cout<<"large1 is empty"<<endl;}else{cout<<"large1 is not empty"<<endl;}
vector<string> condition;
int sizeLarge=large1.size();
//large1 ( , , ),
for(int i=sizeLarge-1;i>0;i--)
{
FPNode *sibling=(large1[i]).siblingNode;
//if(sibling==NULL)cout<<"*****"<<i<<endl;system("pause");
vector<vector<string> > pathSet;
pathSet.clear();
while(sibling!=NULL)
{
unsigned int count=sibling->count;
FPNode *parent=sibling->parentNode;
vector<string> path;
path.clear();
while(parent!=NULL)
{// , path
if(parent->parentNode!=NULL)
{//parent , parent ,
path.push_back(parent->nodeName);
parent=parent->parentNode;
}
else
{
break;
}
}
if(!(path.empty()))
{
for(int j=0;j<count;j++)
{
pathSet.push_back(path);
}
}
sibling=sibling->siblingNode;
}
if(pathSet.size()!=0)
{
cpbMap.insert(pair<string,vector<vector<string> > >((large1[i]).nodeName,pathSet));
}
}
}
//////////////////////////////////////////////////////////
void DestroyTree(FPNode *root)
{
if(root!=NULL)
{
vector<FPNode *> childNode=root->childNode;
unsigned int size=childNode.size();
for(int i=0;i<size;i++)
{
DestroyTree(childNode[i]);
}
delete root;
}
root=NULL;
}
// FP
void OutputFPTree(FPNode *root)
{
if(root==NULL)
{//cout<<"root is null"<<endl;
return;
}
//cout<<"root not null"<<endl;
vector<FPNode*> childNode=root->childNode;
cout<<"{"<<root->nodeName<<","<<root->count<<","<<childNode.size();
if(childNode.size()!=0)
{
cout<<"} :"<<endl;
for(int i=0;i<childNode.size();i++)
{
OutputFPTree(childNode[i]);
}
}
else
{
cout<<"} "<<endl;
}
}
// FP , FP
void CreateCpbFPtree(map<string,vector<vector<string> > > &cpbMap,map<string,FPNode *> &cpbFPTreeMap,unsigned int minSupport)
{
map<string,vector<vector<string> > >::iterator cpbMapIt;
for(cpbMapIt=cpbMap.begin();cpbMapIt!=cpbMap.end();cpbMapIt++)
{
string nodeName=cpbMapIt->first;
vector<vector<string> > pathSet=cpbMapIt->second;
vector<FPNode> cpbLarge;
// nodeName pathSet cpbLarge
CreateItemset(pathSet,cpbLarge,minSupport);
//
//cout<<"
"<<nodeName<<" "<<endl;
//OutputLargeItemset(cpbLarge,1);
//if(pathSet.empty()){cout<<"pathSet is null"<<endl;}else{cout<<"pathSet not null"<<endl;}
//if(cpbLarge.empty()){cout<<"cpbLarge is null"<<endl;}else{cout<<"cpbLarge not null"<<endl;system("pause");}
FPNode *cpbRoot=NULL;
if(!(pathSet.empty())&&!(cpbLarge.empty()))
{
CreateFPTree(pathSet,cpbLarge,&cpbRoot);// nodeName FP
//cout<<"
"<<nodeName<<" FP "<<endl;
//OutputFPTree(cpbRoot);
}
if(cpbRoot!=NULL)
{//
vector<FPNode *> *childNode=&(cpbRoot->childNode);
vector<FPNode *>::iterator childIt=(*childNode).begin();
unsigned int size=(*childNode).size();
for(int i=0;i<size;i++)
{
if((*childIt)->count< minSupport)
{//
//cout<<endl<<(*childIt)->nodeName<<" "<<(*childIt)->count<<" EEEEEEEEEEEERROR"<<endl;system("pause");
DestroyTree(*childIt);
childIt=(*childNode).erase(childIt);//cout<<endl<<(*childIt)->nodeName<<" "<<(*childIt)->count<<" EEEEEEEEEEEERROR"<<endl;system("pause");
}
else
{
childIt++;
}
}
if((*childNode).size()!=0)
{// 0, FP
cpbFPTreeMap.insert(pair<string,FPNode*>(nodeName,cpbRoot));
}
}
}
}
//////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////
// FP , pathSet
void CreatePath(FPNode *root,vector<list<FPNode> > &pathSet)
{
//vector<list<FPNode> > pathSet=new vector<list<FPNode> >();
if(root!=NULL)
{
if((root->childNode).empty())
{//root
list<FPNode> path;
if(root->parentNode!=NULL)
{//
path.push_back(*root);//cout<<"root!=NULL"<<"--"<<root->nodeName<<" "<<root->count<<" "<<(root->childNode).size()<<endl;
}
if(path.size()!=0)
{
pathSet.push_back(path);
}
}
else
{//root
vector<FPNode *> childNode=root->childNode;
unsigned int size=childNode.size();
if(size>1)
{//root
for(int i=0;i<size;i++)
{
unsigned int count=childNode[i]->count;
CreatePath(childNode[i],pathSet);
unsigned int cnt=(childNode[i]->childNode).size();
if(cnt==0)
{
cnt=1;
}
if(root->parentNode!=NULL)
{
unsigned int sizePathSet=pathSet.size();
for(int j=sizePathSet-cnt;j<sizePathSet;j++)
{//cout<<"size>1"<<"--"<<root->nodeName<<" "<<root->count<<" "<<(root->childNode).size()<<endl;
list<FPNode> *lis=&(pathSet[j]);
FPNode *node=new FPNode;
node->nodeName=root->nodeName;
node->count=count;
node->parentNode=NULL;
lis->push_front(*node);
}
}
}
}
else
{//root
for(int i=0;i<size;i++)
{
unsigned int count=childNode[i]->count;
CreatePath(childNode[i],pathSet);
if(root->parentNode!=NULL)
{
unsigned int sizePathSet=pathSet.size();
for(int j=sizePathSet-1;j<sizePathSet;j++)
{//cout<<"size==1"<<"--"<<root->nodeName<<" "<<root->count<<" "<<(root->childNode).size()<<endl;
list<FPNode> *lis=&(pathSet[j]);
lis->push_front(*root);
}
}
}
}
}
}
//return pathSet;
}
//
void OutputPathSet(vector<list<FPNode> > &pathSet)
{
vector<list<FPNode> >::iterator pathSetIt;
for(pathSetIt=pathSet.begin();pathSetIt!=pathSet.end();pathSetIt++)
{
list<FPNode> path=*pathSetIt;
list<FPNode>::iterator pathIt;
for(pathIt=path.begin();pathIt!=path.end();pathIt++)
{
cout<<(*pathIt).nodeName<<" ";
}
cout<<endl<<endl;
}
}
// path fp,k path.size()-1
map<list<string>,int>* CreateFP(list<FPNode> &path)//list<FPNode>::reverse_iterator pathIt,int k,vector<vector<string> > &fp)
{
if(path.size()>0)
{
// path
FPNode *temp=&(path.front());
if(!temp)
{
return NULL;
}
list<string> lis;
string start=temp->nodeName;
lis.push_back(start);
map<list<string>,int> *fp=new map<list<string>,int>;
if(!fp)
{
return NULL;
}
fp->insert(pair<list<string>,int>(lis,temp->count));
path.pop_front();
map<list<string>,int> *others=CreateFP(path);
if(others)
{
map<list<string>,int>::iterator othersIt=others->begin();
for(;othersIt!=others->end();othersIt++)
{
fp->insert(pair<list<string>,int>(othersIt->first,othersIt->second));
const list<string> *li=&(othersIt->first);
list<string> tmp;
list<string>::const_iterator liIt=li->begin();
while(liIt!=li->end())
{
tmp.push_back(*liIt);
liIt++;
}
tmp.push_front(start);
fp->insert(pair<list<string>,int>(tmp,othersIt->second));
}
}
return fp;
}
return NULL;
}
// FP ,
void ObtainFrequenPattern(map<string,FPNode *> &cpbFPTreeMap,map<string,map<list<string>,int>* > &frequentPatternMap)
{//if(cpbFPTreeMap.empty()){cout<<"cpbFPTreeMap is empty"<<endl;}
map<string,FPNode *>::iterator cpbTreeIt;
for(cpbTreeIt=cpbFPTreeMap.begin();cpbTreeIt!=cpbFPTreeMap.end();cpbTreeIt++)
{
string nodeName=cpbTreeIt->first;
FPNode *cpbRoot=cpbTreeIt->second;
// nodeName pathSet
vector<list<FPNode> > pathSet;
list<FPNode> path;
// FP
// cout<<endl<<nodeName<<" FP :"<<endl;
// OutputFPTree(cpbRoot);
CreatePath(cpbRoot,pathSet);
//
// cout<<endl<<nodeName<<" :"<<endl;
// OutputPathSet(pathSet);system("pause");
vector<list<FPNode> >::iterator pathSetIt;
for(pathSetIt=pathSet.begin();pathSetIt!=pathSet.end();pathSetIt++)
{
list<FPNode> pathTemp=*pathSetIt;
// , fp
map<list<string>,int>* fp=NULL;
fp=CreateFP(pathTemp);
// cout<<"CreateFP finish"<<endl;
if(fp==NULL)
{
// cout<<"fp is null"<<endl;
continue;
}
map<string,map<list<string>,int>* >::iterator fpmIt;
fpmIt=frequentPatternMap.find(nodeName);
if(fpmIt==frequentPatternMap.end())
{// nodeName , fp
frequentPatternMap.insert(pair<string,map<list<string>,int>* >(nodeName,fp));
}
else
{
map<list<string>,int>::iterator fpIt=fp->begin();
unsigned int sizeFp=fp->size();
map<list<string>,int>* pt=fpmIt->second;
map<list<string>,int>::iterator ptIt=pt->begin();
unsigned int sizePt=pt->size();
for(;fpIt!=fp->end();++fpIt)
{// fp list pt list
bool flag=true;
int j=0;
for(ptIt=pt->begin(),j=0;j<sizePt&&ptIt!=pt->end();++j,++ptIt)
{
list<string>::const_iterator t1It=(fpIt->first).begin();
unsigned int sizeT1=(fpIt->first).size();
list<string>::const_iterator t2It=(ptIt->first).begin();
unsigned int sizeT2=(ptIt->first).size();
if(sizeT1!=sizeT2)
{
flag=false;
continue;
}
for(int k=0;k<sizeT1;k++)
{
if(*t1It!=*t2It)
{
flag=false;
break;
}
t1It++;
t2It++;
if(k==sizeT1-1)
{
flag=true;
}
}
if(flag==true)
{
break;
}
}
if(flag==true)
{
ptIt->second+=fpIt->second;
}
else
{
pt->insert(pair<list<string>,int>(fpIt->first,fpIt->second));
}
}
}
}
}
}
//
void OutputFrequentPattern(map<string,map<list<string>,int>* > &frequentPatternMap,unsigned int minSupport)
{//if(frequentPatternMap.empty()){cout<<"frequentPatternMap is empty"<<endl;}
map<string,map<list<string>,int>* >::iterator it;
int geshu=0;
for(it=frequentPatternMap.begin();it!=frequentPatternMap.end();it++)
{geshu++;
string nodeName=it->first;
map<list<string>,int>* fpSet=it->second;
unsigned int size=fpSet->size();
map<list<string>,int>::iterator fpSetIt=fpSet->begin();
cout<<nodeName<<":"<<endl;
for(int i=0;i<size;++i,++fpSetIt)
{
const list<string> *fp=&(fpSetIt->first);
unsigned int cnt=fpSetIt->second;
if(cnt>=minSupport)
{
unsigned int len=fp->size();
list<string>::const_iterator fpIt=fp->begin();
cout<<"{ ";
for(int j=0;j<len;j++,++fpIt)
{
cout<<*fpIt<<" ";
}
cout<<nodeName<<" }"<<endl;
}
}
}cout<<geshu<<"---";
}
//
void OutputConditionalPatterBase(map<string,vector<vector<string> > > &cpbMap)
{
map<string,vector<vector<string> > >::iterator cpbMapIt;
for(cpbMapIt=cpbMap.begin();cpbMapIt!=cpbMap.end();cpbMapIt++)
{
string nodeName=cpbMapIt->first;
vector<vector<string> > cpbSet=cpbMapIt->second;
vector<vector<string> >::iterator cpbSetIt;
cout<<nodeName<<" "<<cpbSet.size()<<" :"<<endl;
for(cpbSetIt=cpbSet.begin();cpbSetIt!=cpbSet.end();cpbSetIt++)
{
vector<string> cpb=*cpbSetIt;
vector<string>::iterator cpbIt;
for(cpbIt=cpb.begin();cpbIt!=cpb.end();cpbIt++)
{
cout<<*cpbIt<<" ";
}
cout<<endl;
}
}
}
// FP
void OutputCpbFPTree(map<string,FPNode *> &cpbFPTreeMap)
{
map<string,FPNode *>::iterator cpbFPTreeMapIt;
for(cpbFPTreeMapIt=cpbFPTreeMap.begin();cpbFPTreeMapIt!=cpbFPTreeMap.end();cpbFPTreeMapIt++)
{
string nodeName=cpbFPTreeMapIt->first;
FPNode *root=cpbFPTreeMapIt->second;
cout<<nodeName<<" FP :"<<endl;
OutputFPTree(root);
}
}
int main()
{
char *fileName="retail.dat";
int minSupport=6171;//
clock_t start=clock();
vector<vector<string> > database;//
ObtainDatabase(database,fileName);
vector<FPNode> large1;
CreateItemset(database,large1,minSupport);
int k=1;
vector<FPNode> largeTemp=large1;
//
OutputLargeItemset(largeTemp,k);
FPNode *root=NULL;
// FPTree
CreateFPTree(database,large1,&root);
// FP
//OutputFPTree(root);
//if(root==NULL)cout<<"root is null"<<endl;
map<string,vector<vector<string> > > cpbMap;
// large1 conditional pattern base 。 。
CreateConditionalPatterBase(large1,cpbMap);
//
//OutputConditionalPatterBase(cpbMap);
//if(cpbMap.empty()){cout<<"cpbMap is empty"<<endl;}///////////////////////////////////////////////////////////////////////////////////
map<string,FPNode *> cpbFPTreeMap;
// FP , FP
CreateCpbFPtree(cpbMap,cpbFPTreeMap,minSupport);
// FP
//OutputCpbFPTree(cpbFPTreeMap);
map<string,map<list<string>,int>* > frequentPatternMap;
// FP ,
ObtainFrequenPattern(cpbFPTreeMap,frequentPatternMap);
//
OutputFrequentPattern(frequentPatternMap,minSupport);
clock_t end=clock();
cout<<"Finish! :"<<(end-start)<<"ms"<<endl;
system("pause");
}
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
【Codility Lesson3】FrogJmpA small frog wants to get to the other side of the road. The frog is currently located at position X and wants to get to...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.