Generating Multivariate Normal Data by Using PROC IML & R

Generating Multivariate Normal Data by Using PROC IML :
 http://analytics.ncsu.edu/sesug/2006/CC15_06.PDF
1.Generate the bivariate normal data
/* Generate the bivariate normal data */
data one;
mean1=0; *mean for y1;
mean2=10; *mean for y2;
sig1=2; *SD for y1;
sig2=5; *SD for y2;
rho=0.5; *Correlation between y1 and y2;
do i = 1 to 1000;
r1 = rannor(1245);
r2 = rannor(2923);
y1 = mean1 + sig1*r1;
y2 = mean2 + rho*sig2*r1+sqrt(sig2**2-sig2**2*rho**2)*r2;
output;
end;
keep y1 y2;
*proc print;
run;

2.Generate the multivariate normal data by using PROC IML
a.nonmacro version
beginning with the correlation matrix R and a vector of means and standard deviations
/* Generate the multivariate normal data in SAS/IML */
/* non-macro version */
data MVN_par; /* data for the parameter for the multivariate normal data */
   input r1 r2 r3 means vars;
cards;
1.0 -0.5 0.9 100 2
-0.5 1.0 -0.7 200 5
0.9 -0.7 1.0 300 10
;
proc iml;
   use MVN_par;
   read all var {r1 r2 r3} into R;
   read all var {means} into mu;
   read all var {vars} into sigma;

   p = ncol(R);
   diag_sig = diag( sigma );
   DRD = diag_sig * R * diag_sig`;
   U = half(DRD);

   do i = 1 to 1000;
      z = rannor( j(p,1,1234));
      y = mu + U` * z;
      yprime = y`;
      yall = yall // yprime;
   end;

   varnames = { y1 y2 y3 };

   create my_MVN from yall (|colname = varnames|);
   append from yall;
quit;
proc print data=my_MVN;run;

b.macro version
for any dimension multivariate normal data with variance-covariance matrix and means
%macro mvn(varcov=, means=, n=, myMVN=);
   /* arguments for the macro:
    1. covcov: data set for variance-covariance matrix
    2. means: data set for mean vector
    3. n: sample size
    4. myMVN: output data set name */
proc iml;
   use &varcov; /* read in data for variance-covariance matrix */
   read all into sigma;
   use &means; /* read in data for means */
   read all into mu;
   p = nrow(sigma); /* calculate number of variables */
   n = &n;
   l = t(half(sigma)); /* calculate cholesky root of cov matrix */
   z = normal(j(p,&n,1234)); /* generate nvars*samplesize normals */
   y = l*z; /* premultiply by cholesky root */
   yall = t(repeat(mu,1,&n)+y); /* add in the means */
   varnames = { y1 y2 y3 };
   create &myMVN from yall (|colname = varnames|);
   append from yall;
quit;
%mend mvn;
data means1;
   input x @@;
cards;
100 200 300
;
run;
data varcov1;
   input x1-x3;
cards;
4 -5 18
-5 25 -35
18 -35 100
;
run;
%mvn(varcov=varcov1, means=means1, n=1000, myMVN=my_MNV)
proc print data=my_MNV;run;

3.Test
proc univariate normal noprint data=test1; var y1 y2 y3; output out=new mean=avg1 avg2 avg3 std=std1 std2 std3 probn=prob1 prob2 prob3; run; proc print data=new; run; proc corr data=test1 noprint outp=mycorr; var y1 y2 y3; run; proc print data=mycorr; run; goptions reset=all; symbol1 value=dot cv=blue height=0.5 width=2; proc gplot data=test1; plot y1*y2 y2*y3 y1*y3; run; proc gchart data=test1; vbar y1 y2 y3; run;
 
Generate Multivariate normal data in R
1.Generate the bivariate normal data
rbivariate <- function(mean.x = 0, sd.x=2, mean.y=10, sd.y=5, r=.50, iter=1000) {
   z1 <- rnorm(1245)
   z2 <- rnorm(2923)
   x <- sqrt(1-r^2)*sd.x*z1 + r*sd.x*z2 + mean.x
   y <- sd.y*z2 + mean.y
   return(list(x,y))
}
data <- rbivariate(iter=1000)
data
mean(data[[1]])
sd(data[[1]])
mean(data[[2]])
sd(data[[2]])
plot(data[[1]],data[[2]])


2.multivariate normal data with variance-covariance matrix and means
http://web.as.uky.edu/statistics/users/viele/sta601s03/multnorm.pdf
rmultnorm<-function(n,muvec,sigmat){
# Original code written by Kert Viele
# Modified by Mark Lancaster.
# n is the number of random vectors to be generated
# mu is the mean vector, sigmat is the variance-covariance matrix
# the function returns an n by p matrix with each row being a
# random vector from a multivariate normal with mean vector muvec
# and covariance matrix sigmat
if(length(muvec)==1){
temp<-rnorm(n,muvec,sqrt(sigmat))
return(temp)
}
else{
sigeigen<-eigen(sigmat)
amat<-sigeigen$vectors%*%diag(sqrt(sigeigen$values))
temp<-matrix(rnorm(n*length(muvec),0,1),ncol=n)
temp<-(amat%*%temp)+muvec
temp<-t(temp)
return(temp)
}
}

we can get more information on how to generate multivariate normal data in R,from:
http://wiki.math.yorku.ca/index.php/PSYC_6140:_Multivariate_Normal_R_script however,there are also some packages do the same job,such as "MSBVAR"

좋은 웹페이지 즐겨찾기