function [est,se,se_cond,em,art_data,aug_data]=match(Y,T,X,Z,V,All,M,BiasAdj,Weight,Weight_matrix,Var_calc,weight,SAMPLE,exact); % Guido Imbens, imbens@econ.berkeley.edu % current version, August 11th, 2003 % Y is the vector of outcomes, dimension N by 1 % T is the vector of treatments, dimension N by 1, all elements equal to 0 or 1 % X is a matrix of covariates, dimension N by Kx, to be used for matching % Z is a matrix of covariates, dimension N by Kz, to be used for bias adjustment % All is indicator: 0 for SATT (average effect for treated) % 1 for SATE (average effect for all) % 2 for SATC (average effect for controls) % M is a vector of the number of matches % BiasAdj is indicator: 0 if no bias adjustment % 1 if bias adjustment using Z % Weight is indicator: 1 if weights are equal to inverse of variances % 2 if weights are equal to mahalanobis metric % 3 if weight matrix is supplied in Weight_matrix % Weight_matrix: Kx by Kx matrix of weights, only used if Weight==2 % Var_calc is indicator: 0 if for variance calculation we assume constant % treatment effect and homoskedasticity % MV if robust variance, using MV>=1 matches for variances % range is a pair of values. Only observations with the value of the % function match_select(Y,T,X,Z) in the range (including boundary values) % are used in the estimation % weight is a vector of the same length as Y, with a set of weights % to be used in estimation. %art_data=[I,IM,IT,IY,Yc,Yt,W,IX_u,Xc_u,Xt_u,Yc_adj,Yt_adj,Tau_i]; % outcomes from the matching program % es gives the estimated average treatment effect % se gives the estimated standard error for the average treatment effect % art_data gives an artificial data set that contains all the matches constructed % this data set is on the order of N times M units if all the units are matched, % and N1 times M if only the treated are matched. The actual number can be slightly higher % if there are ties in the matches. % I: index of unit that is being matched % IM: index of unit that is used as a match % IT: treatment status for the unit % IY: actual outcome for the unit % Yc: predicted control outcome for unit % Yt: predicted treated outcome for unit % W: weight for match (1/M if no ties, otherwise smaller) % IX_u: value of X for unit that is being matched % Xc_u: value of X for control unit in match % Xt_u: value of X for treated unit in match % Yc_adj: adjusted control outcome % Yt_adj: adjusted treated outcome % Tau_i: estimated treatment effect for match % aug_data gives an augmented data set with a number of extra variables % Y: outcome % X: covariates for matching % Z: covariates for bias-adjustment % Kcount: number of times a unit is used as a match divided by the number of matches % Sig: estimate of standard deviation of potential outcome at the value for the covariates % if SATC is to be estimated the treatment indicator is reversed if All==2, T=1-T; end, % check on the number of matches, to make sure the number is within the limits % feasible given the number of observations in both groups. if All==1, M=min(M,min(sum(T),sum(1-T))); else M=min(M,sum(1-T)); end, % two slippage parameters that are used to determine whether distances are equal % distances less than ccc or cdd are interpeted as zero. ccc=0.00001; %ccc=ccc/100; %ccc=ccc/100000; cdd=0.00001; % I. set up % I.a. vector for which observations we want the average effect % iot_t is the vector with weights in the average treatment effects % iot_c is the vector of indicators for potential controls if All==1, %iot_t=ones(length(T),1); iot_t=weight; %iot_c=iot_t; iot_c=ones(length(T),1); else iot_t=T.*weight; iot_c=(1-T); end, % I.b. determine sample and covariate vector sizes [N,Kx]=size(X); [N,Kz]=size(Z); % K covariates % N observations Nt=sum(T); Nc=sum(1-T); on=ones(N,1); % I.c. normalize regressors to have mean zero and unit variance. % If the standard deviation of a variable is zero, its normalization % leads to a variable with all zeros. % The matrix AA enables one to transform the user supplied weight matrix % to take account of this transformation. % Mu_X and Sig_X keep track of the original mean and variances AA=eye(Kx); Mu_X=zeros(Kx,1); Sig_X=zeros(Kx,1); for k=1:Kx, Mu_X(k,1)=sum(X(:,k).*weight)/sum(weight); eps=X(:,k)-Mu_X(k,1); Sig_X(k,1)=sqrt(sum(X(:,k).*X(:,k).*weight)/sum(weight)-Mu_X(k,1)^2); %Sig_X(k,1)=Sig_X(k,1)*sqrt(sum(weight)/(sum(weight)-1)); Sig_X(k,1)=Sig_X(k,1)*sqrt(N/(N-1)); X(:,k)=eps/Sig_X(k,1); AA(k,k)=Sig_X(k,1); end Sig_X; [Nv,Mv]=size(V); Mu_V=zeros(Mv,1); Sig_V=zeros(Mv,1); for j=1:Mv, Mu_V(j,1)=V(:,j)'*weight/sum(weight); dv=V(:,j)-Mu_V(j,1); sv=sqrt(sum(V(:,j).*V(:,j).*weight)/sum(weight)-Mu_V(j,1)^2); sv=sv*sqrt(N/(N-1)); Sig_V(j,1)=sv; %V(:,j)=dv/sv; end, Sig_V.*Sig_V; % I.d. define weight matrix for metric, taking into account normalization of % regressors. % If the eigenvalues of the regressors are too close to zero the Mahalanobis metric % is not used and we revert back to the default inverse of variances. if Weight==1; Weight_matrix=eye(Kx); end, if Weight==2 if min(eig(X'*X/N))>0.0000001, Weight_matrix=inv(X'*X/N); %Weight_matrix=inv(X'*X/N)+0.1*eye(Kx); %Weight_matrix=eye(Kx); else Weight_matrix=eye(Kx); end, end, if Weight==3, size(AA); size(Weight_matrix); Weight_matrix=AA*Weight_matrix*AA; end, [Mu_X,Sig_X]; [mean(X)',std(X)']; [sum(X);min(X);max(X)]; size(X); %E=X(:,1).*X(:,1); %[sum(E),min(E),max(E)] %X'*X %X'*X/N %Weight_matrix if exact==1, Weight_matrix=[Weight_matrix,zeros(Kx,Mv);zeros(Mv,Kx),1000*inv(diag(Sig_V.*Sig_V))]; X=[X,V]; Mu_X=[Mu_X;zeros(length(Mu_V),1)]; Sig_X=[Sig_X;ones(length(Sig_V),1)]; end, [Nx,Kx]=size(X); sort(eig(Weight_matrix)); min(eig(Weight_matrix)); Weight_matrix; %pause %if Weight==2, %'pauze' %pause %end if min(eig(Weight_matrix))1, Dist=sum((DX.*DX)')'; else Dist=DX.*DX; end % Dist distance to observation to be matched % is N by 1 vector POTMAT=(T==1-TREATi); % set of potential matches (all observations with other treatment) XPOT=X(POTMAT,:); % X's for potential matches DistPot=Dist(POTMAT,1); TTPotMat=TT(POTMAT,1); i; [DistPot(1:4,1),TTPotMat(1:4,1)]; %pause weightPot=weight(POTMAT,1); [S,L]=sort(DistPot); % sorted distance of potential matches weightPot_sort=weightPot(L,1); weightPot_sum=cumsum(weightPot_sort); tt=(1:length(weightPot_sum))'; MMM=min(tt(weightPot_sum>=M)); Distmax=S(MMM,1); % distance at last match ACTMAT=(POTMAT & Dist<=(Distmax+ccc)); % selection of actual matches ACTDIST=Dist(ACTMAT,1); % distance to actual matches Kcount=Kcount+weight(i,1)*weight.*ACTMAT/sum(ACTMAT.*weight); % counts how many times each observation is matched. KKcount=KKcount+weight(i,1)*weight.*weight.*ACTMAT/(sum(ACTMAT.*weight)*sum(ACTMAT.*weight)); % counts how many times each observation is matched. Mi=sum(weight.*ACTMAT); % counts number of matches for observation i % Unless there are ties this should equal M MMi(i,1)=Mi; Wi=weight(ACTMAT,1); ymat=Y(ACTMAT,1)'*Wi/Mi; % mean of Y's for actual matches xmat=(X(ACTMAT,:)'*Wi/Mi)'; % mean of X's for actual matches zmat=(Z(ACTMAT,:)'*Wi/Mi)'; % mean of Z's for actual matches YCAUS(i,1)=TREATi*(yy-ymat)+(1-TREATi)*(ymat-yy); % estimate causal effect on y for observation i XCAUS(i,:)=TREATi*(xx-xmat)+(1-TREATi)*(xmat-xx); % difference between x and actual matches for observation i ZCAUS(i,:)=TREATi*(zz-zmat)+(1-TREATi)*(zmat-zz); % difference between x and actual matches for observation i % collect results I=[I;i*ones(sum(ACTMAT),1)]; DD=[DD;ACTDIST]; IM=[IM;INN(ACTMAT,1)]; IT=[IT;TREATi*ones(sum(ACTMAT),1)]; IX=[IX;ones(sum(ACTMAT),1)*xx]; IZ=[IZ;ones(sum(ACTMAT),1)*zz]; IY=[IY;ones(sum(ACTMAT),1)*yy]; W=[W;weight(i,1)*Wi/Mi]; % weight for matches ADist=[ADist;ACTDIST]; WWi=[WWi;Wi]; if TREATi==1, Xt=[Xt;ones(sum(ACTMAT),1)*xx]; % covariates for treated Zt=[Zt;ones(sum(ACTMAT),1)*zz]; % covariates for treated Yt=[Yt;yy*ones(sum(ACTMAT),1)]; % outcome for treated Xc=[Xc;X(ACTMAT,:)]; % covariate for matches Zc=[Zc;Z(ACTMAT,:)]; % covariate for matches Yc=[Yc;Y(ACTMAT,1)]; % outcome for matches else Xc=[Xc;ones(sum(ACTMAT),1)*xx]; % covariates for controls Zc=[Zc;ones(sum(ACTMAT),1)*zz]; % covariates for controls Yc=[Yc;ones(sum(ACTMAT),1)*yy]; % outcome for controls Xt=[Xt;X(ACTMAT,:)]; % covariate for matches Zt=[Zt;Z(ACTMAT,:)]; % covariate for matches Yt=[Yt;Y(ACTMAT,1)]; % outcome for matches end, end, end, % transform matched covariates back for artificial data set Xt_u=Xt; Xc_u=Xc; IX_u=IX; for k=1:Kx, Xt_u(:,k)=Mu_X(k,1)+Sig_X(k,1)*Xt_u(:,k); Xc_u(:,k)=Mu_X(k,1)+Sig_X(k,1)*Xc_u(:,k); IX_u(:,k)=Mu_X(k,1)+Sig_X(k,1)*IX_u(:,k); end, if All~=1, I=I(IT==1,:); IM=IM(IT==1,:); IT=IT(IT==1,:); IY=IY(IT==1,:); Yc=Yc(IT==1,:); Yt=Yt(IT==1,:); W=W(IT==1,:); ADist=ADist(IT==1,:); WWi=WWi(IT==1,:); IX_u=IX_u(IT==1,:); Xc_u=Xc_u(IT==1,:); Xt_u=Xt_u(IT==1,:); Xc=Xc(IT==1,:); Xt=Xt(IT==1,:); Zc=Zc(IT==1,:); Zt=Zt(IT==1,:); IZ=IZ(IT==1,:); end, % III. Regression of outcome on covariates for matches % III.a. Treated if All==1, NNt=length(Z); % number of observations ZZt=[ones(NNt,1),Z]; % add intercept [Nx,Kx]=size(ZZt); % number of covariates xw=ZZt.*(sqrt(T.*Kcount)*ones(1,Kx)); wout=inv(xw'*xw+eye(Kx)*cdd*(min(eig(xw'*xw))<=cdd)*max(std(xw)))*(xw'*(Y.*sqrt(T.*Kcount))); min(eig(xw'*xw)); max(eig(xw'*xw)); [NW,KW]=size(wout); Alphat=wout(2:NW,1); else Alphat=zeros(Kz,1); end % III.b. Controls NNc=length(Z); ZZc=[ones(NNc,1),Z]; [Nx,Kx]=size(ZZc); xw=ZZc.*(sqrt((1-T).*Kcount)*ones(1,Kx)); wout=inv(xw'*xw+eye(Kx)*cdd*(min(eig(xw'*xw))<=cdd)*max(std(xw)))*(xw'*(Y.*sqrt((1-T).*Kcount))); [NW,KW]=size(wout); Alphac=wout(2:NW,1); %size(Alphat) %size(Alphac) Alpha=[Alphat,Alphac]; % III.c. adjust matched outcomes using regression adjustment for bias adjusted matching estimator SCAUS=YCAUS-T.*(ZCAUS*Alphac)-(1-T).*(ZCAUS*Alphat); Yc_adj=Yc+BiasAdj*(IZ-Zc)*Alphac; % adjusted treated outcome Yt_adj=Yt+BiasAdj*(IZ-Zt)*Alphat; % adjusted control outcome Tau_i=Yt_adj-Yc_adj; art_data=[I,IM,IT,DD,IY,Yc,Yt,W,WWi,ADist,IX_u,Xc_u,Xt_u,Yc_adj,Yt_adj,Tau_i]; % III. If conditional variance is needed, initialize variance vector % and loop through all observations [Nx,Kx]=size(X); ww=chol(Weight_matrix); NN=(1:N)'; if Var_calc>0, Sig=zeros(N,1); sd=std(Y); % overall standard deviation of outcome for i=1:N, TREATi=T(i,1); % treatment indicator observation to be matched xx=X(i,:); % covariate value for observation to be matched POTMAT=T==TREATi; % potential matches are all observations with POTMAT(i,1)=0; % the same treatment value weightPOT=weight(POTMAT==1,1); DX=(X-ones(Nx,1)*xx)*(ww'); if Kx>1, Dist=sum((DX.*DX)')'; else Dist=DX.*DX; end % distance to observation to be matched DistPot=Dist(POTMAT,1); % Distance vector only for potential matches [S,L]=sort(DistPot); % sorted distance of potential matches [S,L]; [DistPot,weightPOT]; weightPOT_sort=weightPOT(L,1); [S,L,weightPOT_sort]; weightPOT_sum=cumsum(weightPOT_sort); weight(i,1); tt=(1:length(weightPOT_sum))'; MMM=min(tt(weightPOT_sum+weight(i,1)-1>=Var_calc)); MMM=min(tt(weightPOT_sum>=Var_calc)); [Var_calc,weightPOT_sum(MMM,1)+weight(i,1)-1]; MMM=min(MMM,length(S)); Distmax=S(MMM,1); % distance of Var_calc-th closest match ACTMAT=(POTMAT & Dist<=Distmax+ccc); [weight,ACTMAT,Dist]; [i,Distmax+ccc]; sum(ACTMAT); % indicator for actual matches, that is % all potential matches closer than, or as close % as the Var_calc-th closest Yactmat=[Y(i,1);Y(ACTMAT,1)]; weightactmat=[weight(i,1);weight(ACTMAT,1)]; sum(weightactmat); fm=Yactmat'*weightactmat/sum(weightactmat); sm=sum(Yactmat.*Yactmat.*weightactmat)/sum(weightactmat); sigsig=(sm-fm*fm)*sum(weightactmat)/(sum(weightactmat)-1); if sigsig<0, sigsig; sum(weightactmat); sm; fm; sm-fm*fm; %pause end Sig(i,1)=sqrt(sigsig); % standard deviation of actual matches end, Sigs=Sig.*Sig; % variance estimate end, % matching estimator est=W'*Tau_i/sum(W); est_t=sum((iot_t.*T+iot_c.*Kcount.*T).*Y)/sum(iot_t.*T+iot_c.*Kcount.*T); est_c=sum((iot_t.*(1-T)+iot_c.*Kcount.*(1-T)).*Y)/sum(iot_t.*(1-T)+iot_c.*Kcount.*(1-T)); %est=est_t-est_c; %pause if Var_calc==0, eps=Tau_i-est; eps_sq=eps.*eps; Sigs=0.5*ones(N,1)*(eps_sq'*W)/sum(W); sss=sqrt(Sigs(1,1)); end, [Sigs,sqrt(Sigs),iot_t,iot_c,Kcount,KKcount]; SN=sum(iot_t); var_sample=sum((Sigs.*(iot_t+iot_c.*Kcount).*(iot_t+iot_c.*Kcount))/(SN*SN)); if All==1, var_pop=sum((Sigs.*(iot_c.*Kcount.*Kcount+2*iot_c.*Kcount-iot_c.*KKcount))/(SN*SN)); else var_pop=sum((Sigs.*(iot_c.*Kcount.*Kcount-iot_c.*KKcount))/(SN*SN)); end %var_pop=0; if BiasAdj==1, size(iot_t); size(SCAUS); dvar_pop=sum(iot_t.*(SCAUS-est).*(SCAUS-est))/(SN*SN); else size(iot_t); size(YCAUS); dvar_pop=sum(iot_t.*(YCAUS-est).*(YCAUS-est))/(SN*SN); end var_pop=var_pop+dvar_pop; if SAMPLE==1, var=var_sample; else var=max(var_sample,var_pop); var=var_pop; end var_cond=max(var_sample,var_pop)-var_sample; se=sqrt(var); se_cond=sqrt(var_cond); [sqrt(var_sample),sqrt(var_pop)]; Sig=sqrt(Sigs); aug_data=[Y,T,X,Z,Kcount,Sig,weight]; if All==2, est=-est; end, em=[0;0]; exact; if exact==1, Vt_u=Xt_u(:,Kx-Mv+1:Kx); Vc_u=Xc_u(:,Kx-Mv+1:Kx); Vdif=abs(Vt_u-Vc_u); size(Vdif); Mv; if Mv>1, Vdif=sum(Vdif')'; end, em(1,1)=length(Vdif); em(2,1)=sum(Vdif>0.000001); end