"""
Rpy glm example:  Shows how to use glm (with family=binomial)
to predict the probability of a binary (yes or no) outcome.
I use it to produce probability forecasts from a weather
prediction model - this a simple toy example that illustrates
the process.

Jeff Whitaker
<Jeffrey.S.Whitaker@noaa.gov>
20030801
"""
import sys
from Numeric import *
from RandomArray import *
from MLab import *
import sys
sys.path.insert(1, "..")
from rpy import *

# specify correlation between forecast and observations.
fcstcorr = 0.5
# specify link function for glm (logit, probit or cloglog)
link = 'logit'
# specify exceedance threshold for probability forecast
thresh = 0.
# specify number of realizations.
nfcsts = 10000
# compute climatological probabaility of exceeding threshold.
climprob = r.pnorm(thresh,0.,1.)*ones(nfcsts,'f')

print 'fcstcorr = ',fcstcorr
print 'link = ',link
print 'thresh = ',thresh
print 'climo prob = ',max(climprob)
print 'nfcsts = ',nfcsts

# generate realizations from multivariate normal distribution
#                     
#                    obsvar fcstcov 
# with cov matrix =  
#                    fcstcov  fcstvar
#
# obsvar is fixed to 1, fcstcov = fcstcorr*sqrt(fcstvar).
# so only free parameter is fcstcorr.

fcstvar = fcstcorr**2
fcstcov = fcstcorr*sqrt(fcstvar)

# repeat the process 1000 times.

for nrun in range(1000):
    print 'run ',nrun

    x = multivariate_normal([0.,0.], [[fcstcov,fcstvar],[1.,fcstcov]], nfcsts)

# Now have two time series that are correlated at r=fcstcorr.
# Call one the 'observations' and one the 'forecast'
    obs = x[:,0]
    fcst = x[:,1]

# train the glm (fit the model)

    binaryobs =  obs > thresh
    d = r.data_frame(x=fcst.tolist(),y=binaryobs.tolist())
    formula = r("y ~ x")
    family = r("family=binomial(link="+link+")")
    model = with_mode(NO_CONVERSION,r.glm)(formula,family=family,data=d)
    print 'model coefficients = ',r.coef(model)

# independent data for verification

    x = multivariate_normal([0.,0.], [[fcstcov,fcstvar],[1.,fcstcov]], nfcsts)
    obs = x[:,0]
    fcst = x[:,1]

# predict the probability that fcst > thresh, using
# the glm on independent data.

    d = r.data_frame(x=fcst.tolist())
    fcstprob = array(r.predict(model, newdata=d, type="response"))

# calculate brier skill score of the forecasts.
# bss = 0: forecasts are no better than climatology
# bss = 1: forecasts are perfect.

    verif = obs > thresh
    bs = mean((fcstprob - verif)**2)
    bsclim = mean((climprob - verif)**2)
    bss = 1.-(bs/bsclim)
    print "brier skill score = ",bss
# try to delete the objects to free memory (doesn't work)
    del model
    del d
