#| class15.lsp - Matrix factorizations for rectangular matrices QR SVD |# ;--- Utility for building matrix of polynomials (defun POLY-MATRIX (x order) "Builds matrix of columns x,x^2,...." (apply #'bind-columns (mapcar #'(lambda (p) (^ x p)) (iseq 1 order)) )) ;--- Utilities for extracting a row or column as a list (defun AS-LIST (x) (coerce (compound-data-seq x) 'list)) (defun COL-AS-LIST (matrix j) (as-list (select matrix (iseq (array-dimension matrix 0)) j) )) (defun ROW-AS-LIST (matrix i) (as-list (select matrix i (iseq (array-dimension matrix 1))) )) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; ;; Generate data using Cholesky ;; ;; Tierney, Section 5.5, 168 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defun CORR-NORMAL (n covMat) "Returns list of n normals with indicated covariance matrix." (let ((chol (chol-decomp covMat)) ) (if (zerop (second chol)) (let ((k (array-dimension covmat 0)) ) (mapcar #'(lambda (x) (matmult (first chol) x)) (split-list (normal-rand (* n k)) k))) (format t "Cov not psd; offset by ~a~%" (second chol)) ))) (def DATA (corr-normal 50 '#2a((1 .5 0) (.5 1 .5) (0 .5 1) ))) (print-matrix (setf cov (apply #'covariance-matrix (transpose DATA))) t :float-digits 2) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; ;; Decompositions: QR ;; ;; Tierney, Section 5.5, 168 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;--- Embodies the Gram-Schmidt process (pivoting is optional) (setf qr (qr-decomp data)) ; will not work with a list (def x (make-array '(50 3) :initial-contents data)) (setf qr (qr-decomp x)) ;--- Note the form of the orthogonality ... uncentered (def q (first qr)) (print-matrix (covariance-matrix q) t :float-digits 2) (print-matrix (matmult (transpose q) q)) ;--- Triangular structure (computed one row at a time) (def r (second qr)) (print-matrix r) ;--- Relationship to Cholesky for square (print-matrix (first (chol-decomp (matmult (transpose x) x)))) ;--- Now for regression: first make a y (def y (+ (matmult x '(1 1 1)) (normal-rand (array-dimension x 0)))) (def rm (regression-model x y)) (format t "Regr RSS ~a~%" (send rm :residual-sum-of-squares)) ;--- Do QR regression by hand (add 1 for constant term) (def qr (qr-decomp (bind-columns (repeat 1 (array-dimension x 0)) x))) (def q (first qr)) (def r (second qr)) (def c (matmult (transpose q) y)) (format t "Regr coefs ~a~%" (solve r c)) (def res (- y (matmult q c))) (format t "RSS = ~a~%" (inner-product res res)) ;--- Now for a very ill-conditioned problem ; -- Gen the data (def n 101) (def x (bind-columns (repeat 1 n) (poly-matrix (rseq 100 101 n) 3))) (def y (+ (matmult x '(1 1 1 1)) (normal-rand (array-dimension x 0)))) ; -- Compute with the algebraic formulas (def b^ (solve (matmult (transpose x) x) (matmult (transpose x) y))) (def resb (- y (matmult x b^))) ; -- Print it (format t "b^ = ~a~%" b^) (format t "RSS = ~a~%" (inner-product resb resb)) ; -- Compute with GS/QR decomp (def qr (qr-decomp x)) (def q (first qr)) (def r (second qr)) (def c^ (matmult (transpose q) y)) (def resc (- y (matmult Q c^))) ; -- Print it (format t "c^ = ~a~%" c^) (format t "b from c ~a ~%" (solve R c^)) (format t "RSS = ~a~%" (inner-product resc resc)) ; -- Compare resids... Differences look cubic! (def p (plot-points (iseq n) (- resc resb))) ;--- Keep it all in the QR (def k (array-dimension x 1)) (def qrxy (qr-decomp (bind-columns x y))) (def q2 (first qrxy)) (def r2 (second qrxy)) (format t "Last column of R:~% ~a~%" (col-as-list r2 k)) (format t "SS res ~a~%" (^ (aref r2 k k) 2)) (def res2 (col-as-list q2 k)) (plot-points res2 resc) ;--- Standard regression... What is it doing? (def rm (regression-model x y)) (format t "Regr RSS ~a~%" (send rm :residual-sum-of-squares)) ;--- Comparison program (defun OLS-1 (x y) (let* ((b (matmult (inverse (matmult (transpose x) x)) (transpose x) y)) (e (- y (matmult x b))) ) (list b (inner-product e e)) )) (defun OLS-2 (x y) (let* ((b (solve (matmult (transpose x) x) (matmult (transpose x) y))) (e (- y (matmult x b))) ) (list b (inner-product e e)) )) (defun QR-1 (x y) (let* ((qr (qr-decomp x)) (q (first qr)) (r (second qr)) (c (matmult (transpose q) y)) (e (- y (matmult q c))) ) (list (solve r c) (inner-product e e)) )) (defun QR-2 (x y) (let* ((qr (qr-decomp (bind-columns x y))) (q (first qr)) (r (second qr)) (k (array-dimension x 1)) (n (array-dimension x 0)) (c (select r (iseq k) k)) (e (select q (iseq n) k)) ) (list (solve (select r (iseq k) (iseq k)) c) (^ (select r k k) 2)) )) (defun COMPARE-REGR (x y) (flet ((print-it (name b rss) (format t "----------~% ~a: ~%" name) (format t "b = ~{ ~15a ~}~%" (as-list b)) (format t "rss = ~15af ~%~%" rss) )) (let* ((n (array-dimension x 0)) (x (bind-columns (repeat 1 n) x)) (k (array-dimension x 1)) ) (dolist (m '(ols-1 ols-2 qr-1 qr-2)) (apply #'print-it m (funcall m x y))) ))) (def c .9995) (def m 100000) (def x (+ m (apply #'bind-columns (transpose (corr-normal 50 (make-array '(3 3) :initial-contents (list (list 1 c c) (list c 1 c) (list c c 1)))))))) (def y (matmult x '(1 1 1))) (terpri) (terpri) (compare-regr x y) (def x (poly-matrix (rseq 100 101 50) 3)) (def x (poly-matrix (rseq 0 1 50) 3)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; ;; Decompositions: SVD ;; ;; Tierney, Section 5.5, 168 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (def x (apply #'bind-columns (transpose (corr-normal 50 '#2a((1 .5 0) (.5 1 .5) (0 .5 1) ))))) ;--- eigenvalues of X'X (def ev (eigen (matmult (transpose x) x))) (format t "Eigenvalues: ~a~%" (first ev)) (format t "First e-vector: ~a~%" (first (second ev))) ;--- Singular values (last element is T if alg converges) (def svd (sv-decomp x)) (format t "Singular values^2: ~a~%" (^ (second svd) 2)) (format t "First member of V: ~a~%" (col-as-list (third svd) 0)) ;--- Matrix norms and approximations (defun IP (m1 m2) (sum (diagonal (matmult m1 (transpose m2))))) (defun APPROX (mat m) (let* ((svd (sv-decomp mat)) (u (first svd)) (d (second svd)) (v (third svd)) (approx (make-array (array-dimensions mat) :initial-element 0.0)) ) (dotimes (i m) (setf approx (+ approx (* (select d i) (outer-product (col-as-list u i) (col-as-list v i) #'*))))) approx)) (format t "ip = ~a~%" (ip x x)) (format t "sum s^2 = ~a~%" (sum (^ (second svd) 2))) (def a (approx x 3)) (format t "ip error = ~a~%" (ip (- x a) (- x a))) (format t "s^2 = ~a~%" (^ (second svd) 2)) ;--- A least squares problem (for X with col mean zero) (defun DEMEAN (x) (- x (mean x))) (defun DEMEAN-MAT (m) (apply #'bind-columns (mapcar #'demean (column-list m)))) (def x (apply #'bind-columns (transpose (corr-normal 200 '#2a((1 .5 0) (.5 1 .5) (0 .5 1) ))))) (def x (demean-mat x)) (scatterplot-matrix (column-list x)) (def y (+ (matmult x (repeat 1 (array-dimension x 1))) (normal-rand (array-dimension x 0)) )) (def rm (regression-model x y :intercept nil)) (def noise (demean-mat (make-array (array-dimensions x) :initial-contents (normal-rand (apply #'* (array-dimensions x)))))) (def x* (+ noise x)) (def rm2 (regression-model x* y :intercept nil)) ;--- Total least squares (or just do via eigenvectors/values) (def svd (sv-decomp (bind-columns x* y))) (def v (third svd)) (print-matrix v t :float-digits 2) (format t "Singular values: ~a~%" (second svd)) (def ap (approx (bind-columns x y) 3)) (def coef (col-as-list v 3)) (format t "~{ ~12,9f ~}~%" (matmult ap coef)) (defun TOTAL-LEAST-SQUARES (x y) (let* (( k (array-dimension x 1)) (svd (sv-decomp (bind-columns x y))) ( v (col-as-list (third svd) k)) ( d (select (second svd) k)) ) (format t "Singular values are ~{ ~6,2f ~}~%" (coerce (second svd) 'list)) (/ v (- (select v k))) )) (format t "TLS: ~a~%" (total-least-squares x* y)) ;--- Check with other expression (format t "TLS another way ~a~%" (solve (- (matmult (transpose x*) x*) (* (select (second svd) 3) (identity-matrix 3))) (matmult (transpose x*) y)))