Source code for PyGT.GT

# -*- coding: utf-8 -*-
r"""
Iteratively remove nodes from a Markov chain with graph transformation
----------------------------------------------------------------------

This module implements the graph transformation algorithm to eliminate
nodes from a discrete- or continuous-time Markov chain. When the removed nodes
are chosen judiciously, the resulting network is less sparse, of lower dimensionality,
and is generally better-conditioned. See `PyGT.tools` for various tools
which help select nodes to eliminate, namely, ranking nodes based on their mean
waiting times and equilibrium occupation probabilities. [Kannan20b]_

The graph transformation algorithm requires a branching probability matrix
:math:`\textbf{B}` with elements :math:`B_{ij}=k_{ij}{\tau}_j` where :math:`k_{ij}` is
the :math:`i \leftarrow j` inter-microstate transition rate and :math:`\tau_j`
is the mean waiting time of node :math:`j`. In a discrete-time Markov chain,
:math:`\textbf{B}` is replaced with the discrete-time transition probability
matrix :math:`\textbf{T}(\tau)` and the waiting times of all nodes are uniform,
equal to the lag time :math:`\tau`.

In each iteration of GT, a single node :math:`x` is removed, and the branching
probabilities and waiting times of the neighboring nodes are updated according
to

.. math::

	\begin{eqnarray}
		B_{ij}^{\prime} &\leftarrow B_{ij} + \frac{B_{ix}B_{xj}}{1-B_{xx}} \\
		\tau_j^\prime  &\leftarrow \tau_j + \frac{B_{xj}\tau_j}{1-B_{xx}}
	\end{eqnarray}

A matrix version of the above equations permits the removal of blocks of nodes
simulatenously. The code monitors for stability in the inversion
algorithm, to allow for for one-by-one node removal instead if a block is
ill-conditioned. [Swinburne20a]_

"""

import os,time
import numpy as np

from scipy.sparse import csgraph, csr_matrix, lil_matrix,eye, save_npz, load_npz, diags,kron, issparse
from scipy.sparse.linalg import spsolve,inv

""" test for ipython environment (is this being loaded from a notebook) """
try:
	__IPYTHON__
except NameError:
	in_notebook = False
else:
	in_notebook = True

""" test for tqdm progress bars """

try:
	if in_notebook:
		from tqdm import tqdm_notebook as tqdm
	else:
		from tqdm import tqdm
	has_tqdm=True
except:
	has_tqdm=False



[docs]def blockGT(rm_vec,B,tau,block=20,order=None,rates=False,Ndense=50,screen=False,cond_thresh=1.0e13):

	r"""
	Main function for GT code, production of a reduced matrix by graph transformation.

	Parameters
	----------
	rm_vec:	(N,) array-like, bool
			Boolean array of which nodes to remove

	B:		(N,N) dense or sparse matrix
			Matrix of branching probabilities (CTMC) or transition probabilities (DTMC)

	tau:	(N,) array-like
			Array of waiting times (CTMC) or lag times (DTMC)

	block:	int, optional
			Number of node to attempt to remove simultaneously. Default = 20

	order: 	(N,) array-like, optional
			Order in which to remove nodes. Default ranks on node connectivity.
			Modify with caution: large effect on performance

	rates: 	bool, optional
			Whether to return the GT-reduced rate matrix in addition to B and tau.
			Only vaid for CTMC case. Default = False

	Ndense: int, optional
			Force switch to dense representation if N<Ndense. Default = 50

	screen: bool, optional
			Whether to print progress of GT. Default = False

	cond_thresh: float, optional
			Threshold condition number below which block matrix inversion
			is attempted. If block condition number is too high, conventional
			GT is used to remove nodes, which is less efficient but more stable.
			Default= ``1e13``
	Returns
	-------
	B:		(N',N') dense or sparse matrix
			Matrix of N'<N renormalized branching probabilities.
			Will be returned as same type (sparse/dense) as input

	tau:	(N',) array-like
	     	Array of N'<N renormalized waiting times

	K:		(N',N') dense or sparse matrix (same type as B)
			Matrix of N'<N renormalized transition rates. Only if ``rates=True``


	"""
	dense = not issparse(B)
	force_sparse = issparse(B)

	if dense:
		density = B[B>0.0].sum()/float(B.size)

	retry=0

	N = rm_vec.size
	pN = N
	#total number of states to remove
	NI = rm_vec.sum()
	D = 1.0 / tau


	if screen and has_tqdm:
		pbar = tqdm(total=NI,leave=True,mininterval=0.0,desc='GT')
	tst = 0.0
	tmt = 0.0
	tc = 0

	pass_over = np.empty(0,bool)
	pobar = None

	dense_onset = 0
	if dense:
		dense_onset = B.shape[0]
	if screen:
		t = time.time()
	while NI>0:

		if N<Ndense and not dense:
			#when network is small enough, more efficient to switch to dense format
			dense = True
			#nnz is number of stored values in B, including explicit zeros
			density = float(B.nnz) /  float( B.shape[0]*B.shape[1])
			dense_onset = B.shape[0]
			B = B.todense()

		rm = np.zeros(N,bool)
		if pass_over.sum()>0:
			Bd = np.ravel(B.diagonal())
			Bd[~pass_over] = Bd.min()-1.0
			Bd[~rm_vec] = Bd.min()-1.0
			rm[Bd.argmax()] = True
			if not pobar is None:

				pobar.update(1)
		else:
			#order the nodes to remove
			if order is None:
				if not dense:
					#order contains number of elements in each row
					#equivalent to node in-degree
					order = B.indptr[1:]-B.indptr[:-1]
				else:
					order = np.linspace(0.,1.0,B.shape[0])
			if not pobar is None:
				pobar.close()
			pobar = None
			order[~rm_vec] = order.max()+1
			rm[order.argsort()[:min(block,NI)]] = True

		B, tau, success = singleGT(rm,B,tau,cond_thresh)

		if success:
			if screen and has_tqdm:
				pbar.update(rm.sum())
			N -= rm.sum()
			NI -= rm.sum()
			rm_vec = rm_vec[~rm]
			if not (order is None):
				order = order[~rm]
			if pass_over.sum()>0:
				pass_over = pass_over[~rm]
			rmb = 1 + (block-1)*int(pass_over.sum()==0)
		else:
			pass_over = rm
			rmb = 1
			retry += 1
			if screen and has_tqdm:
				pobar = tqdm(total=rm.sum(),leave=True,mininterval=0.0,desc="1-by-1 GT subloop for stability")
	if screen and has_tqdm:
		pbar.close()

	if dense and screen:
		print("GT BECAME DENSE AT N=%d, density=%f" % (dense_onset,density))

	# revert back to sparse for final clean up
	B = csr_matrix(B)

	B.eliminate_zeros()

	Bd = np.ravel(B.diagonal()) # only the diagonal (Bd_x = B_xx)
	Bn = B - diags(Bd) # B with no diagonal (Bn_xx = 0, Bn_xy = B_xy)
	Bn.eliminate_zeros()
	Bnd = np.ravel(Bn.sum(axis=0)) # Bnd_x = sum_x!=y B_yx = 1-B_xx
	nBd = np.zeros(N)
	nBd[Bd>0.99] = Bnd[Bd>0.99]
	nBd[Bd<0.99] = 1.0-Bd[Bd<0.99]
	omB = diags(nBd) - Bn # 1-B
	tau = np.ravel(tau.flatten())
	if dense:
		B = B.todense()
	if rates:
		K = -omB.dot(diags(1.0/tau)) # (B-1).D = K ( :) )
		if dense:
			K = K.todense()

	if screen:
		print("GT removed %d nodes in %2.2g seconds with %d floating point corrections" % (pN-N,time.time()-t,retry))

	if rates:
		return B,tau,K
	return B,tau


[docs]def singleGT(rm_vec,B,tau,cond_thresh=1.0e13):
	r"""
	Single iteration of GT algorithm used by main GT function.
	Either removes a single node with float precision correction [Wales09]_
	or attemps node removal via	matrix inversion [Swinburne20a]_. In the latter
	case, if an error is raised by ``np.linalg.inv`` this is communicated
	through ``success``

	Parameters
	----------
	rm_vec:	(N,) array-like, bool
			Boolean array of which nodes to remove

	B:		(N,N) dense or sparse matrix
			Matrix of branching probabilities

	tau:	(N,) array-like
	     	Array of waiting times

	cond_thresh: float, optional
			Threshold condition number below which matrix inversion is attempted.
			If condition number is higher than ``cond_thresh``,
			``singleGT()`` returns ``success=False``.
			Default= ``1.0e13``
	Returns
	-------
	B:		(N',N') dense or sparse matrix
			Matrix of N'<N renormalized branching probabilities

	tau:	(N',) array-like
	     	Array of N'<N renormalized waiting times

	success: bool
			 False if estimated condition number is too high or
			 ``LinAlgError`` raised by ``np.linalg.inv``

	"""

	dense = not issparse(B)

	Bxx = B[rm_vec,:][:,rm_vec]
	Bxj = B[rm_vec,:][:,~rm_vec]
	Bix = B[~rm_vec,:][:,rm_vec]
	Bij = B[~rm_vec,:][:,~rm_vec]
	iDjj = tau[~rm_vec]
	iDxx = tau[rm_vec]

	if rm_vec.sum()>1: # Try block GT
		# Get Bxx
		Bd = np.ravel(Bxx.diagonal())

		# Get sum Bix for i not equal x
		Bxxnd = Bxx - np.diag(Bd)
		Bs = np.ravel(Bix.sum(axis=0))
		Bs += np.ravel(Bxxnd.sum(axis=0))

		# Float
		Bs[Bd<0.99] = 1.0-Bd[Bd<0.99]
		iGxx = np.diag(Bs) - Bxxnd
		if np.linalg.cond(iGxx)>cond_thresh:
			return B,tau,False
		try:
			Gxx = np.linalg.inv(iGxx)
		except np.linalg.LinAlgError as err:
			return B,tau,False
		iDG = Gxx.transpose().dot(iDxx).transpose()
		iDjj += np.ravel(Bxj.transpose().dot(iDG)).flatten()

		# This is computational bottleneck
		if dense:
			Bij += Bix@Gxx@Bxj
		else:
			Bij += Bix@csr_matrix(Gxx)@Bxj

		return Bij,iDjj,True
	else: # Standard GT
		Bxx = Bxx.sum()
		if Bxx>0.99:
			b_xx = Bix.sum()
		else:
			b_xx = 1.0-Bxx

		if dense:
			iDjj = iDjj.flatten()
			Bxj = np.ravel(Bxj.flatten() / b_xx)
			Bix = np.ravel(Bix.flatten())
			Bij += np.outer(Bix,Bxj)
			iDjj += iDxx.sum() * Bxj
		else:
			Bxj.data /= b_xx
			Bij += kron(Bix,Bxj)
			iDjj[Bxj.indices] += Bxj.data*iDxx

		return Bij,iDjj,True