다시 쓴 스크립트.괜찮았으면 좋겠어요.

13064 단어 각본
#!/bin/sh





###############################################

#                                             #

#  author:lishujun                            #

#  date:2013-4-17                             #

#  use:$./testread.sh 20130425                #

#                                             #

###############################################



### load Library ###

. common



#init_date $1

#init_db

common_init_stat_date $1

stat_date=$statDate



### define global variable ###



logFile='./logs/'$stat_date'/wap_access_log.'$stat_date'*'

dataFile='./data/access_detail_'$stat_date'_small'

domain='pp.cn,sou.pp.cn,c0.pp.cn,c1.pp.cn,c2.pp.cn,c3.pp.cn'





#domain=$2

#domain='shuqi.com,pp.cn,shuqiread.com'

ucFlags='fr,ct,ac'



### functions ###



cleanDataFile()

{

        echo clean data files...

        rm $dataFile

}





makeDataFile()

{

        #./kcat.sh  $logFile | awk \

        /www/scripts/stat/kcat.sh  $logFile | awk \

                -v domain=$domain \

                -v ucFlags=$ucFlags \

                -v dataFile=$dataFile \

        '

        function extractField(s)

        {

                gsub(/\[/,"",s)

                gsub(/\]/,"",s)

                return s

        }



        function extractFad(url)

        {

                #if(match(url,/\[FAD:/) > 0)

                if(match(url,/\[FAD:[0-9]+\]/) > 0)

                {

                        return substr(url,RSTART+5,RLENGTH-6)

                }

                return "-"

        }



        function parseUrl(url,params)

        {

                params["domain"] = "-"

                if(match(url,/[:\/]/)>0)

                {

                        params["domain"] = substr(url,1,RSTART-1)

                }



                params["path"] = "-"

                if(match(url,/\/[^\?$]*[\?$]/)>0)

                {

                        params["path"] = substr(url,RSTART,RLENGTH-1)

                }



                params["fr"] = "-"

                if(match(url,/[&?]fr=[^&$]+/)!=0)

                {

                        params["fr"]=substr(url,RSTART+4,RLENGTH-4)

                }



                params["ct"] = "-"

                if(match(url,/[&?]ct=[^&$]+/)!=0)

                {

                        params["ct"]=substr(url,RSTART+4,RLENGTH-4)

                }



                params["ac"] = "-"

                if(match(url,/[&?]ac=[^&$]+/)!=0)

                {

                        params["ac"]=substr(url,RSTART+4,RLENGTH-4)

                }

        }



        BEGIN{

                split(domain,domainList,",")

        }



        {

                #print $0

                userid = extractField($4)

                url = extractField($6)

                session = extractField($9)

                fad = extractFad($0)



                if(int(userid) < 1)

                {

                        userid = session

                }



                for(i in domainList)

                {

                        len = length(domainList[i])

                        if(substr(url,0,len) == domainList[i])

                        {

                                parseUrl(url,urlParams)

                                line = userid" "urlParams["domain"]" "urlParams["path"]" "urlParams["fr"]" "urlParams["ct"]" "urlParams["ac"]" "fad

                                #print line



                                if(line in list)

                                {

                                        list[line]++

                                }

                                else

                                {

                                        list[line] = 1

                                        print line >> dataFile

                                }

                                break

                        }

                }



        }

        '

}





main()

{

        echo `date ` start...

        cleanDataFile

        makeDataFile

        echo `date` done

}



#call main function

main

 

좋은 웹페이지 즐겨찾기