;;;; ;;;; ;;;; Programs for the study of the spline smoothing code. ;;;; ;;;; 7 Apr 97 ... Sun version work. ;;;; 16 Dec 91 ... Created. (require "splsm") #| Computing the 'projection' matrix for spline smoothers... ;--- Build S_{\la} for n=6 (print-matrix (splsm-matrix (iseq 10) 3) t :float-digits 2) ;--- Build S_{\la} for n=100 (def x (rseq 0 100 100)) (def df 6) (def s (splsm-matrix x df)) (sum (get-row s 20)) ;--- Plot the diagonals S_ii (def p (plot-points (iseq (length x)) (diagonal s))) (send p :abline (/ df (length x)) 0) ;--- Plot smoothing weights (negative weights!) (def p (plot-lines x (get-row s 0))) (send p :add-lines x (get-row s 10) :color 'red) (send p :add-lines x (get-row s 20) :color 'blue) (send p :add-lines x (get-row s 30) :color 'red) (send p :add-lines x (get-row s 40) :color 'blue) (send p :add-lines x (get-row s 50) :color 'red) ;--- Compare various forms for counting df (trace s) (trace (matmult s (transpose s))) ; always less than (trace s) ;--- Compare MSE to that of polynomial (def x (rseq 0 (* 2 pi) 100)) (def y (+ (sin x) (* 0.25 (normal-rand (length x))))) (def p (plot-points x y)) (send p :add-lines x (sin x) :color 'magenta) (send p :add-lines x (splsm x y :df 12) :color 'red) (send p :add-lines x (splsm x y :df 6) :color 'red) (send p :add-lines x (poly-fit x y 6) :color 'blue) (def df '(6 6.5 7 7.5 8 8.5 9 9.5 10 10.5 11 11.5 12)) ; n = 100 (def mse (mapcar #'(lambda (df) (splsm-mse x y (sin x) df)) df)) (def p (plot-points df mse :variable-labels '("DF" "MSE"))) (def msep (mapcar #'(lambda (df) (poly-mse x y (sin x) df)) (iseq 4 12))) (send p :add-points (iseq 4 12) msep :symbol 'cross) (send p :adjust-to-data) ;--- Now make the target function 'rougher' with jump (def x (rseq .01 .99 100)) (def f (+ (sin (* 2 pi x)) (* 5 (normal-cdf (/ (- x (mean x)) .05))))) (def y (+ f (* 0.25 (normal-rand (length x))))) (def p (plot-points x y)) (send p :add-lines x f :color 'magenta) (send p :add-lines x (splsm x y :df 12) :color 'red) (send p :add-lines x (splsm x y :df 6) :color 'red) (send p :add-lines x (poly-fit x y 12) :color 'blue) (def df (rseq 12 18 13)) ; n = 100 (def mse (mapcar #'(lambda (df) (splsm-mse x y f df)) df)) (def p (plot-points df mse :variable-labels '("DF" "MSE"))) (def msep (mapcar #'(lambda (df) (poly-mse x y f df)) (iseq 12 17))) (send p :add-points (iseq 12 17) msep :symbol 'cross) (send p :adjust-to-data) ;--- Use CVSS to attempt to find best lambda (splsm-cvss x y 10) (def df (iseq 10 14)) (def cv (mapcar #'(lambda (df) (splsm-cvss x y df)) df )) (def mse (mapcar #'(lambda (df) (splsm-mse x y f df)) df )) (def p (plot-points df mse)) (def p (plot-lines df (mapcar #'first cv))) (send p :adjust-to-data) (send p :add-lines df (mapcar #'second cv) :color 'red) (send p :adjust-to-data) (send p :add-lines df (mapcar #'third cv) :color 'blue) (send p :adjust-to-data) |# (defun SPLSM-CVSS (x y df) "Returns list of cvss, gcvss, usual ss" (let* ((sii (diagonal (splsm-matrix x df))) (f^ (splsm x y :df df)) ( e (- y f^)) ) (list (sum (^ (/ e (- 1 sii)) 2)) (sum (^ (/ e (- 1 (/ df (length x)))) 2)) (sum (^ e 2)) ) )) (defun SPLSM-MATRIX (x df) "Computes the projection matrix of the spline with x given." (let* ((n (length x)) (splsm (send splsm-proto :new x (repeat 0 n))) ) (apply #'bind-columns (mapcar #'(lambda(y) (send splsm :replace-y y) (send splsm :find-smooth :df df) (send splsm :smooth)) (column-list (identity-matrix n)) ) ) )) (defun TRACE (m) (sum (diagonal m))) (defun GET-ROW (m row) (let ((n (array-dimension m 1)) ) (coerce (compound-data-seq (select m row (iseq n))) 'list) )) (defun SPLSM-MSE (x y f df) ; MSE of spline approximation to f based on data in cols x y. (let* ((f^ (splsm x y :df df)) (err (- f f^)) (mse (sum (* err err))) ) (format t "With df = ~a and n = ~a, MSE = ~a. ~%" df (length x) mse) mse )) (defun POLY-FIT (x y df) (let ((m (regression-model (outer-product x (iseq 1 (1- df)) #'^) y :print nil)) ) (format t "Coefs = ~a~%" (send m :coef-estimates)) (send m :fit-values))) (defun POLY-MSE (x y f df) ; MSE of polynomial approximation to f based on data in cols x y. (let* ((err (- f (poly-fit x y df))) (mse (sum (* err err))) ) (format t "With df = ~a, MSE = ~a~%" df mse) mse)) #| (def b25 '(0.4493 0.1995 0.0936 0.0448 0.02235 0.0116 0.00655 0.00375)) (def b50 '(0.8607 0.3904 0.1792 0.0856 0.04452 0.0233 0.01300 0.00757)) (def b100 '(1.6897 0.7603 0.3513 0.1680 0.08588 0.0463 0.02603 0.01519)) (def b b50) (def p (plot-lines df b)) --> suggests 6 df for sigma = 0.3 (send p :add-lines df (+ b (* .09 df))) (send p :adjust-to-data) (def n 25) (def x (rseq 0 1 n)) (def y (repeat 0 n)) (setf (select y 0) 1) ; (def splsm (send splsm-proto :new x y)) (send splsm :collapse) (send splsm :find-knots) (send splsm :find-smooth :df 8) (def p (plot-points x y)) (send p :add-lines x (send splsm :smooth)) (send splsm :find-deriv (rseq .1 .9 10) 1) |# #| Traces S S'S S'S'S (S'S)(S'S) n= 25: 3.00044 2.50145 2.20595 equally spaced n= 50: 3.00035 2.50058 2.20516 n= 75: n= 25: 5.99758 4.75623 4.28817 4.01493 equally spaced n= 50: 6.00067 4.75225 4.28364 4.01021 very little change with n n=100: 6.00092 4.75189 4.28302 4.00949 n= 25 6.00182 4.76403 4.01806 (beta 3,3) Suggest use n= 25 6.00163 4.76021 4.01845 (beta 1,5) equally spaced n= 25: 6.00369 4.76204 4.01870 (beta 5,2) results. n= 50: 11.99517 9.51246 8.02987 (pair,6 df each,equal sp) n= 25: 5.99758 4.75623 4.01493 (differences sep - pool) Trace of S2 as function of df: n=50: df = 2 3 4 5 6 7 8 9 10 11 12 (def tr '(2 2.50 3.252 4.001 4.7525 5.5048 6.2563 7.0087 7.763 8.511 9.2718)) (def n 25) ; For use with unequal spacing (def x (rseq 0 0.995 n)) (def y (uniform-rand n)) ; (def p (plot-points x y)) (def sm (splsm-matrix x 6)) ; Regular spacing (def x (rseq 0 1 n)) (def sm (splsm-matrix x 6)) (traces sm) (def sep (splsm-matrix-2 25 6)) (def x (rseq 0 1 25)) (def sm (splsm-matrix x 6)) (def pool (bind-rows (bind-columns sm sm) (bind-columns sm sm))) (def pool (/ pool 2)) (def diff (- sep pool)) (traces diff) |# (defun traces (mat) (let ((ss (cross-product sm))) (format t " n=~3d: ~7,5f ~7,5f ~%" (first (array-dimensions sm)) (sum (diagonal sm)) (sum (diagonal ss)) ;(sum (diagonal (matmult ss sm))) ;(sum (diagonal (matmult ss ss))) ))) ; (def p (plot-lines x (array-col sm 0))) ; (send p :add-lines x (array-col sm 50)) ; (send p :add-lines x (array-col sm 0)) (defun mse (f sd df &key print?) "Estimates MSE of smoothing with df: mean f, noise variance s2." (let* ((n (length x)) (fHat (send theSpline :find-smooth :df df)) (fHat (send theSpline :smooth)) (resid (- f fHat)) (bias (sum (* resid resid))) (tr (+ .25 (* .75 df))) (var (* (* sd sd) tr)) (mse (+ bias var)) ) (if print? (format t "(sd=~5,3f,df=~2d): bias= ~6,3f var= ~6,3f mse= ~6,3f.~%" sd df bias var mse)) mse)) ; (mapcar #'(lambda (df) (mse df)) '(2 2.25 2.5)) ; (golden #'mse (list 5 7) :tol 0.001) ; ; Summary of optimal sd for sine plus noise (defun data () ; n = 50 (def sd '( 0.025 0.05 .1 .2 .3 .4 .8 1.6 2 2.4 2.6 3.2 )) (def df50 '(10.885 9.350 8.046 6.948 6.373 5.997 5.213 4.344 3.965 3.648 3.444 2.00)) (def df75 '(11.337 9.801 8.392 7.297 6.650 6.251 5.329 4.530 4.296 3.994 3.855 2)) (def mse50 '(.00584 .0203 .070 .243 .504 .846 2.936 10.08 14.88 20.38 23.29 28.43)) (def mse75 '(.00612 .0211 .0731 .253 .525 .880 3.055 10.53 15.64 21.53 24.72 17.98)) ) ; (scatterplot-matrix (list sd df (sqrt mse))) ; (plot-points sd (sqrt mse)) ; almost linear ; (def p (plot-points (log sd) (log df75))) ; (send p :add-points (log sd) (log df50) :symbol 'cross) ; (def x (rseq 0 (* 2 pi) 75)) ; (def f (sin x)) ; (def theSpline (send splsm-proto :new x f)) (defun optimal-df (sd) (let* ((sdVec '(0.025 0.05 .1 .2 .3 .4 .8 1.6 2 2.4 2.6 3.2)) (dfVec '(11 9 8 7 6 6 5 4 4 3.5 3.5 2.00)) (dfInt (+ '(-2 2) (select dfVec (first (which (= sd sdVec)))))) (opt (golden #'(lambda (df) (mse f sd df)) dfInt :tol 0.001)) (fVal (second opt)) ) (if (< 0.001 (abs (- (first fVal) (second fVal)))) (format t "Warning: f vals differ, ~a~%" fVal)) (mapcar #'mean opt) )) ; (optimal-df 0.3) ; (mapcar #'optimal-df '(0.025 0.05 .1 .2 .3 .4 .8 1.6 2 2.4 2.6 3.2)) ; ; (defun splsm-matrix-2 (n df) "Computes projection associated data in half, n & df in each half." (let* ((x (rseq 0 1 n)) (sm (splsm-matrix x df)) (zero (make-array (list n n) :initial-element 0)) ) (bind-rows (bind-columns sm zero) (bind-columns zero sm)) ))