spCAR: Bayesian spatial CAR model for Gaussian/Poisson regression

Contents

Main function for Bayesian spatial CAR model via MCMC runs

function [] = mainProCAR(ID)

sumProOnly = 1;  if sumProOnly == 1; sumProCAR(); error('stop here'); end

% MCMC setup
verbose = 0;
mypath = './';
niter = 2e4; burn = 15e3;
ch0 = str2double(num2str(ID)); ch = ch0;
Nvar = 7; Nch = 3;
nmodel = ceil(ch/(Nvar*Nch));
ch = ch - (nmodel-1)*(Nvar*Nch);
nvar = ceil(ch/Nch);
ch = ch - (nvar-1)*Nch;

% data input and initialize parameters
rng('default'); rng(ch0*8);
[x, E] = readDataCAR(nvar);

% run MCMC, store the results
xall = nan(niter-burn, E.p+2);
etaMean = zeros(1, E.N);
Ls = nan(1, niter-burn);
eta_a = 0;
batchLen = 50; %min(50, tot);
batchNum = 0;
batchTot = niter/batchLen;
eta_rates = zeros(1, batchTot);

tic
for i = 1:niter
    if verbose==1; fprintf('%6d', i); if(~mod(i,20)); fprintf('\n'); end; end

    % -------------- update fixed-effects \beta
    V = E.X'*(E.M - x.gamma*E.W)*x.invtau2;
    Lo = V*E.X + E.prebeta;
    Lo = chol(Lo, 'lower');
    x.beta = Lo'\( randn([E.p,1]) + Lo\(V*x.eta + E.meanbeta) );
    mu = E.X*x.beta;
    u = x.eta - mu;

    % -------------- update variation \tau2
    x.invtau2  = gamrnd( E.alphatau2 + 0.5*E.N, 1/(E.invbetatau2 + 0.5*u'*(E.M - x.gamma*E.W)*u) );

    % -------------- update spatial dependence \gamma
    P = E.loglike0 + E.gammas*(u'*E.W*u)*(0.5*x.invtau2);
    P = exp(P-max(P))/sum(exp(P-max(P)));
    cump = cumsum([0 P(1:(end-1))]);
    u1 = rand(1); i0 = sum(u1 > cump);
    x.gamma = E.gammas(1);
    if(i0>1); x.gamma = E.gammas(i0-1) + E.gap/P(i0)*(u1-cump(i0)); end

    % -------------- update latent parameter \eta for Poisson model (E.fixeta==0) using tMALA
    if E.fixeta == 0
        Lo = sqrt(x.invtau2) * chol(E.M - x.gamma*E.W, 'lower');
        v = Lo'*u;
        grad = E.Y - E.offset.*exp(x.eta); if numel(grad>E.H)>0; grad(grad>E.H) = E.H; end
        grad = Lo\grad - v;
        v1 = v + 0.5*E.h*grad + E.h_sqrt*randn([E.N, 1]);
        eta1 = mu + Lo'\v1;
        grad1 = E.Y - E.offset.*exp(eta1); if numel(grad1>E.H)>0; grad1(grad1>E.H) = E.H; end
        grad1 = Lo\grad1 - v1;
        P = sum(E.Y.*eta1 - E.offset.*exp(eta1) - 0.5*v1.^2 + 0.5/E.h*(v1 - v - 0.5*E.h*grad).^2) ...
            - sum(E.Y.*x.eta - E.offset.*exp(x.eta) - 0.5*v.^2 + 0.5/E.h*(v - v1 - 0.5*E.h*grad1).^2);
        u1 = log(rand(1));
        if u1 <= P; x.eta = eta1; eta_a = eta_a + 1; end
    end

    if ~mod(i, batchLen); batchNum = batchNum+1; eta_a = eta_a/batchLen; eta_rates(batchNum) = eta_a; eta_a = 0; end

    % -------------- store results
    if i > burn
        xall(i-burn,:) = [x.beta', 1/x.invtau2, x.gamma];
        etaMean = etaMean + x.eta';
        Ls(i-burn) = getLogLikCAR(E, x.beta, 1/x.invtau2, x.gamma, x.eta);
    end
end
etaMean = etaMean/(niter-burn); % for DIC calculation
runtime = toc/60;
fprintf('\n%d iterations are done with elapsed time %.4f minutes.\n', niter, runtime)
% plot(eta_rates) %target acceptance rate: 0.574
save(strcat(mypath,'out',num2str(nvar),'_',num2str(ch),'_car.mat'),'xall','Ls','runtime','eta_rates','etaMean')
end

Import data, set environmental variables, specify prior, and initialize parameter

function [x, E] = readDataCAR(nvar)
% import data, initialize parameter, set environmental variables
load('Beta_env_nooutlier_061316.mat') %X, Y, W
E.N = size(W,1);
X = [X,Z(:,nvar)];
% X = [X,X0,Z(:,nvar)];
E.X = [ones(E.N,1), zscore(X)];  %standardized predictors
E.Y = Y;
E.p = size(E.X, 2);
E.W = W;   E.M = diag(sum(W,1)); invM = inv(E.M);
eigs = eig(sqrt(invM)*W*sqrt(invM));
lgamma = max(1/min(eigs),-1); ugamma = 1/max(eigs);
gap = 1e-3; gammas = (lgamma+gap):gap:(ugamma-gap); len = length(gammas);
% loglike0 = zeros(1,len); for i = 1:len; loglike0(i) = 0.5*sum(log(eig(M-gammas(i)*W))); end; save('loglike0.mat','loglike0')
load('loglike0.mat')
E.gap = gap; E.gammas = gammas; E.loglike0 = loglike0;

% Poisson part
E.fixeta = 1;
if E.fixeta == 0
    E.offset =  ones(E.N,1);     %no offset
    % % for MALA
    E.H = 50; E.h = 0.0012; E.h_sqrt = sqrt(E.h);
    % % used when the latent eta's are independent
    %     E.gap_eta = 1e-2;     etavec = (-8):E.gap_eta:8;
    %     len_eta = length(etavec);
    %     E.loglike0_eta = repmat(E.Y, [1,len_eta]).*repmat(etavec, [E.N,1]) - repmat(E.offset, [1,len_eta]).*repmat(exp(etavec), [E.N,1]);
    %     E.etavec = etavec; E.etavec2 = etavec.^2;
end

% set prior
E.meanbeta = zeros(E.p,1); %(E.X'*E.X)\(E.X'*E.Y);
E.prebeta = 1e-4.*eye(E.p);
E.meanbeta = E.prebeta*E.meanbeta;
meantau2 = 0.01; vartau2 = 10^2;
E.alphatau2 = 2+meantau2^2/vartau2;
E.invbetatau2 = meantau2*(E.alphatau2-1);

% set initial value
x.eta = log(E.Y); %may consider certain transformation here
if E.fixeta == 0; x.eta = log((E.Y+ 0.5*(E.Y==0))./E.offset); end
x.beta = (E.X'*E.X)\(E.X'*x.eta) .* (1 + 1/50*randn([E.p,1]));
x.invtau2 = 1/( sum((sqrt(diag(E.M)).*(x.eta-E.X*x.beta)).^2)/(E.N-E.p) );
x.gamma = 0;
end

Util function: compute working likelihood based on either Gaussian or Poisson density

function [L] = getLogLikCAR(E, beta, tau2, gamma, eta)
if E.fixeta == 1 %Gaussian model
    % Gaussian density
    V = E.M - gamma*E.W;
    eps = eta - E.X*beta;
    L = - 0.5*E.N*log(2*pi*tau2) + 0.5*log(det(V)) - 0.5/tau2.*(eps'*V*eps);
else %Poisson model
    % Poisson density
    L = sum( E.Y.*eta - E.offset.*exp(eta) );
end
end

Summarize Bayesian spaital CAR model, calculate DIC

function [] = sumProCAR()
mypath = './';
nch = 3;
niter = 5e3; burn = 0; thin = 5;
nsample = (niter-burn)/thin;
tot = nsample*nch;
Nvar = 7;
DICs = nan(Nvar, 4);

for varid = 1:Nvar
    [~, E] = readDataCAR(varid);
    npara = E.p+2;

    MCSamples = nan(nch, npara, nsample);
    etaMeanAll = zeros(1, E.N);
    LsAll = nan(nch, nsample);
    for ch = 1:nch
        load(strcat(mypath,'out',num2str(varid),'_',num2str(ch),'_car.mat'))
        vec = (burn+1):thin:size(xall, 1);
        MCSamples(ch, :, :) = xall(vec,:)';
        LsAll(ch,:) = Ls(vec);
        etaMeanAll = etaMeanAll + etaMean;
    end
    etaMeanAll = etaMeanAll/nch;
    R = psrf(MCSamples); % need psrf.m for potential scale reduction factor to check convergence
    %display(R)

    % now stack all chains:
    MCSamples = permute(MCSamples, [3,1,2]);
    MCSamples = reshape(MCSamples, [tot, npara]);

    % trace plot: ehck onvergence
    plotit = 1;
    if plotit == 1
        nam = [strcat(repmat({'\beta_{'},[1,E.p]),cellfun(@num2str,num2cell(1:E.p),'UniformOutput', false),'}'), '\tau^2', '\gamma'];
        for i=1:npara; subplot(3,6,i), plot(reshape(MCSamples(:,i), [nsample,nch])); title(nam{i},'FontSize',12); end
    end

    mat = nan(npara,5);
    for i = 1:npara
        mat(i,1) = mean(MCSamples(:,i)); mat(i,2) = std(MCSamples(:,i));
        %[lb, ub] = FindHPDset(matParas(:,i), 0.95, []);
        tmpq = quantile(MCSamples(:,i), [0.025, 0.975]);
        lb = tmpq(1); ub = tmpq(2);
        mat(i,3) = lb(1); mat(i,4) = ub(1);
    end
    mat(:,5) = (mat(:,3).*mat(:,4))>0;
    D1 = -2*mean(reshape(LsAll, [1,numel(LsAll)]));
    meanPara = mat(:,1);
    D2 = -2*getLogLikCAR(E, meanPara(1:(npara-2)), meanPara(npara-1), meanPara(npara), etaMeanAll');
    pD = D1 - D2;
    DIC = 2*D1 - D2;
    disp(num2str([D1, D2, pD, DIC]))
    save(strcat(mypath, 'paras',num2str(varid),'.mat'),'mat','DIC')
    DICs(varid,:) = [D1, D2, pD, DIC]; %display deviance, effective number of parameters and DIC
end
save(strcat(mypath,'DICs.mat'),'DICs')
end