########
#
# Function that repackages 'kmeans()' so the initialization
#   is a random selection of points from the data matrix.
#   (There is also a slight jitter applied to the initialization
#    because 'kmeans()' barfs when there are exact ties, as
#    occur with non-zero probability when the data are bootstrap
#    samples.)
#
    k.means <- function(x, K.or.align) {
      N <- nrow(x)
      p <- ncol(x)
      if(length(K.or.align)==1) {
        align <- NULL
        K <- K.or.align
      } else {
        align <- K.or.align
        K <- length(unique(align))
      }
      km <- kmeans(x,
                   iter.max=1000,
                   centers = x[sample(N, size=K),]
                   ) 
      if(!is.null(align)) {  #        km <- align.kmeans(align, km)
        segs.u <- sort(union(align, km$cluster))
        segs.n <- length(segs.u)
        seg <- km$clust
        tab <- table(factor(align,    levels=segs.u),
                     factor(km$clust, levels=segs.u))
        ord <- rep(NA,segs.n)
        for(i in 1:segs.n) {
          max.tab <- max(tab)
          max.row <- row(tab)[tab==max.tab][1]
          max.col <- col(tab)[tab==max.tab][1]
          ord[max.col] <- max.row
          tab[max.row,] <- -Inf;  tab[,max.col] <- -Inf
        }
        km$cluster <- ord[km$cluster]
        km$centers <- km$centers[order(ord),]  # inverse mapping: order(ord)
      }
      return(km)
    }

########
#
# Function to align new segmentations 'segs' with an old segmentation 'sego':
#   IMPORTANT: 'segs' can have multiple segmentations, one per ROW!
#
align.segments <- function(sego, segs, ret="seg") { # ret="ord", ret="both"
  if(is.null(dim(segs))) segs <- matrix(segs, nrow=1)
  segs <- as.matrix(segs)
  if(ncol(segs)!=length(sego)) {
    cat("align.segments(): incompatible sizes of segmentations \n"); stop() }
#
  segs.u <- sort(union(sego, segs))
  segs.n <- length(segs.u)
  nSegn  <- nrow(segs)
  for(iseg in 1:nSegn) {
    seg <- segs[iseg,]
    tab <- table(factor(sego, levels=segs.u),
                 factor(seg , levels=segs.u))
    ord <- rep(NA,segs.n);  names(ord) <- segs.u
    for(i in 1:segs.n) {
      max.tab <- max(tab)
      max.row <- row(tab)[tab==max.tab][1]
      max.col <- col(tab)[tab==max.tab][1]
      ord[max.col] <- max.row
      tab[max.row,] <- -Inf;  tab[,max.col] <- -Inf
    }
    segs[iseg,] <- ord[seg]
  }
  if(ret=="seg") return(segs) else
  if(ret=="ord") return(ord)  else
  return(list(seg=segs, ord=ord))
}
# Experiments for debugging:
#dat <- dat.all[,vars]
#dat <- dat.all[,vars.seg]
#sego <- segs.all
#segs <- random.restart(dat, nsegs=7, nrestarts=1)
#table(sego, segs)                             # original
#table(sego, segs=align.segments(sego, segs))  # diagonally matched
#
#
########
#
# Function to align list of kmeans outputs to a reference segmentation:
#
align.kmeans <- function(seg.ref, km.lst, verbose=200) {   
  segs.u <- sort(unique(seg.ref))
  segs.n <- length(segs.u)
  segs.f <- factor(seg.ref, levels=segs.u)
  if(!is.list(km.lst)) km.lst <- list(km.lst)
  for(iseg in 1:length(km.lst)) {
    seg <- km.lst[[iseg]]$clust
    tab <- table(segs.f, factor(seg, levels=segs.u))
    ord <- rep(NA,segs.n);  names(ord) <- segs.u
    for(i in 1:segs.n) {
      max.tab <- max(tab)
      max.row <- row(tab)[tab==max.tab][1]
      max.col <- col(tab)[tab==max.tab][1]
      ord[max.col] <- max.row
      tab[max.row,] <- -Inf;  tab[,max.col] <- -Inf
    }
    km.lst[[iseg]]$clust <- ord[seg]
    km.lst[[iseg]]$cent  <- km.lst[[iseg]]$cent[order(ord),]
    if(iseg%%verbose==0) cat(iseg," ")
  }
  cat("\n")
  if(length(km.lst)==1) return(km.lst[[1]]) else return(km.lst)
}

# try:
#   ctro <- mktg[sample(nrow(mktg),4),-9]
#   ctrn <- apply(ctro, 2, jitter, fac=.01)
#   as.matrix(dist(rbind(ctro,
#                        align.centers(ctro, ctrn, ret="ctr"))))[1:K,1:K+K]
#
withinss <- function(dat, segs) {
  N <- nrow(dat)
  segs.u <- sort(unique(c(segs)))
  segs.n <- length(segs.u)
  centers  <- matrix(NA, nrow=segs.n, ncol=ncol(dat))
  withinss <- rep(NA, segs.n)
  sizes <- table(factor(segs, levels=segs.u))
  for(i in 1:segs.n) {
    centers[i,] <- t(dat[segs==segs.u[i],]) %*% rep(1/sizes[i],sizes[i])
    withinss[i] <- sum((t(dat[segs==segs.u[i],])-centers[i,])^2)
  }
  return(withinss)
}
## sum(withinss(dat, mktg[,9]))
## sum(withinss(scale(mktg[,-9]), mktg.km.lst[[1]]$cluster))
## sum(mktg.km.lst[[1]]$within)

################################################################