Source code for PyGT.GT

# -*- coding: utf-8 -*-
r"""
Iteratively remove nodes from a Markov chain with graph transformation
----------------------------------------------------------------------

This module implements the graph transformation algorithm to eliminate
nodes from a discrete- or continuous-time Markov chain. When the removed nodes
are chosen judiciously, the resulting network is less sparse, of lower dimensionality,
and is generally better-conditioned. See `PyGT.tools` for various tools
which help select nodes to eliminate, namely, ranking nodes based on their mean
waiting times and equilibrium occupation probabilities. [Kannan20b]_

The graph transformation algorithm requires a branching probability matrix
:math:`\textbf{B}` with elements :math:`B_{ij}=k_{ij}{\tau}_j` where :math:`k_{ij}` is
the :math:`i \leftarrow j` inter-microstate transition rate and :math:`\tau_j`
is the mean waiting time of node :math:`j`. In a discrete-time Markov chain,
:math:`\textbf{B}` is replaced with the discrete-time transition probability
matrix :math:`\textbf{T}(\tau)` and the waiting times of all nodes are uniform,
equal to the lag time :math:`\tau`.

In each iteration of GT, a single node :math:`x` is removed, and the branching
probabilities and waiting times of the neighboring nodes are updated according
to

.. math::

	\begin{eqnarray}
		B_{ij}^{\prime} &\leftarrow B_{ij} + \frac{B_{ix}B_{xj}}{1-B_{xx}} \\
		\tau_j^\prime  &\leftarrow \tau_j + \frac{B_{xj}\tau_j}{1-B_{xx}}
	\end{eqnarray}

A matrix version of the above equations permits the removal of blocks of nodes
simulatenously. The code monitors for stability in the inversion
algorithm, to allow for for one-by-one node removal instead if a block is
ill-conditioned. [Swinburne20a]_

"""

import os,time
import numpy as np

from scipy.sparse import csgraph, csr_matrix, lil_matrix,eye, save_npz, load_npz, diags,kron, issparse
from scipy.sparse.linalg import spsolve,inv

""" test for ipython environment (is this being loaded from a notebook) """
try:
	__IPYTHON__
except NameError:
	in_notebook = False
else:
	in_notebook = True

""" test for tqdm progress bars """

try:
	if in_notebook:
		from tqdm import tqdm_notebook as tqdm
	else:
		from tqdm import tqdm
	has_tqdm=True
except:
	has_tqdm=False



[docs]def blockGT(rm_vec,B,tau,block=20,order=None,rates=False,Ndense=50,screen=False,cond_thresh=1.0e13): r""" Main function for GT code, production of a reduced matrix by graph transformation. Parameters ---------- rm_vec: (N,) array-like, bool Boolean array of which nodes to remove B: (N,N) dense or sparse matrix Matrix of branching probabilities (CTMC) or transition probabilities (DTMC) tau: (N,) array-like Array of waiting times (CTMC) or lag times (DTMC) block: int, optional Number of node to attempt to remove simultaneously. Default = 20 order: (N,) array-like, optional Order in which to remove nodes. Default ranks on node connectivity. Modify with caution: large effect on performance rates: bool, optional Whether to return the GT-reduced rate matrix in addition to B and tau. Only vaid for CTMC case. Default = False Ndense: int, optional Force switch to dense representation if N<Ndense. Default = 50 screen: bool, optional Whether to print progress of GT. Default = False cond_thresh: float, optional Threshold condition number below which block matrix inversion is attempted. If block condition number is too high, conventional GT is used to remove nodes, which is less efficient but more stable. Default= ``1e13`` Returns ------- B: (N',N') dense or sparse matrix Matrix of N'<N renormalized branching probabilities. Will be returned as same type (sparse/dense) as input tau: (N',) array-like Array of N'<N renormalized waiting times K: (N',N') dense or sparse matrix (same type as B) Matrix of N'<N renormalized transition rates. Only if ``rates=True`` """ dense = not issparse(B) force_sparse = issparse(B) if dense: density = B[B>0.0].sum()/float(B.size) retry=0 N = rm_vec.size pN = N #total number of states to remove NI = rm_vec.sum() D = 1.0 / tau if screen and has_tqdm: pbar = tqdm(total=NI,leave=True,mininterval=0.0,desc='GT') tst = 0.0 tmt = 0.0 tc = 0 pass_over = np.empty(0,bool) pobar = None dense_onset = 0 if dense: dense_onset = B.shape[0] if screen: t = time.time() while NI>0: if N<Ndense and not dense: #when network is small enough, more efficient to switch to dense format dense = True #nnz is number of stored values in B, including explicit zeros density = float(B.nnz) / float( B.shape[0]*B.shape[1]) dense_onset = B.shape[0] B = B.todense() rm = np.zeros(N,bool) if pass_over.sum()>0: Bd = np.ravel(B.diagonal()) Bd[~pass_over] = Bd.min()-1.0 Bd[~rm_vec] = Bd.min()-1.0 rm[Bd.argmax()] = True if not pobar is None: pobar.update(1) else: #order the nodes to remove if order is None: if not dense: #order contains number of elements in each row #equivalent to node in-degree order = B.indptr[1:]-B.indptr[:-1] else: order = np.linspace(0.,1.0,B.shape[0]) if not pobar is None: pobar.close() pobar = None order[~rm_vec] = order.max()+1 rm[order.argsort()[:min(block,NI)]] = True B, tau, success = singleGT(rm,B,tau,cond_thresh) if success: if screen and has_tqdm: pbar.update(rm.sum()) N -= rm.sum() NI -= rm.sum() rm_vec = rm_vec[~rm] if not (order is None): order = order[~rm] if pass_over.sum()>0: pass_over = pass_over[~rm] rmb = 1 + (block-1)*int(pass_over.sum()==0) else: pass_over = rm rmb = 1 retry += 1 if screen and has_tqdm: pobar = tqdm(total=rm.sum(),leave=True,mininterval=0.0,desc="1-by-1 GT subloop for stability") if screen and has_tqdm: pbar.close() if dense and screen: print("GT BECAME DENSE AT N=%d, density=%f" % (dense_onset,density)) # revert back to sparse for final clean up B = csr_matrix(B) B.eliminate_zeros() Bd = np.ravel(B.diagonal()) # only the diagonal (Bd_x = B_xx) Bn = B - diags(Bd) # B with no diagonal (Bn_xx = 0, Bn_xy = B_xy) Bn.eliminate_zeros() Bnd = np.ravel(Bn.sum(axis=0)) # Bnd_x = sum_x!=y B_yx = 1-B_xx nBd = np.zeros(N) nBd[Bd>0.99] = Bnd[Bd>0.99] nBd[Bd<0.99] = 1.0-Bd[Bd<0.99] omB = diags(nBd) - Bn # 1-B tau = np.ravel(tau.flatten()) if dense: B = B.todense() if rates: K = -omB.dot(diags(1.0/tau)) # (B-1).D = K ( :) ) if dense: K = K.todense() if screen: print("GT removed %d nodes in %2.2g seconds with %d floating point corrections" % (pN-N,time.time()-t,retry)) if rates: return B,tau,K return B,tau
[docs]def singleGT(rm_vec,B,tau,cond_thresh=1.0e13): r""" Single iteration of GT algorithm used by main GT function. Either removes a single node with float precision correction [Wales09]_ or attemps node removal via matrix inversion [Swinburne20a]_. In the latter case, if an error is raised by ``np.linalg.inv`` this is communicated through ``success`` Parameters ---------- rm_vec: (N,) array-like, bool Boolean array of which nodes to remove B: (N,N) dense or sparse matrix Matrix of branching probabilities tau: (N,) array-like Array of waiting times cond_thresh: float, optional Threshold condition number below which matrix inversion is attempted. If condition number is higher than ``cond_thresh``, ``singleGT()`` returns ``success=False``. Default= ``1.0e13`` Returns ------- B: (N',N') dense or sparse matrix Matrix of N'<N renormalized branching probabilities tau: (N',) array-like Array of N'<N renormalized waiting times success: bool False if estimated condition number is too high or ``LinAlgError`` raised by ``np.linalg.inv`` """ dense = not issparse(B) Bxx = B[rm_vec,:][:,rm_vec] Bxj = B[rm_vec,:][:,~rm_vec] Bix = B[~rm_vec,:][:,rm_vec] Bij = B[~rm_vec,:][:,~rm_vec] iDjj = tau[~rm_vec] iDxx = tau[rm_vec] if rm_vec.sum()>1: # Try block GT # Get Bxx Bd = np.ravel(Bxx.diagonal()) # Get sum Bix for i not equal x Bxxnd = Bxx - np.diag(Bd) Bs = np.ravel(Bix.sum(axis=0)) Bs += np.ravel(Bxxnd.sum(axis=0)) # Float Bs[Bd<0.99] = 1.0-Bd[Bd<0.99] iGxx = np.diag(Bs) - Bxxnd if np.linalg.cond(iGxx)>cond_thresh: return B,tau,False try: Gxx = np.linalg.inv(iGxx) except np.linalg.LinAlgError as err: return B,tau,False iDG = Gxx.transpose().dot(iDxx).transpose() iDjj += np.ravel(Bxj.transpose().dot(iDG)).flatten() # This is computational bottleneck if dense: Bij += Bix@Gxx@Bxj else: Bij += Bix@csr_matrix(Gxx)@Bxj return Bij,iDjj,True else: # Standard GT Bxx = Bxx.sum() if Bxx>0.99: b_xx = Bix.sum() else: b_xx = 1.0-Bxx if dense: iDjj = iDjj.flatten() Bxj = np.ravel(Bxj.flatten() / b_xx) Bix = np.ravel(Bix.flatten()) Bij += np.outer(Bix,Bxj) iDjj += iDxx.sum() * Bxj else: Bxj.data /= b_xx Bij += kron(Bix,Bxj) iDjj[Bxj.indices] += Bxj.data*iDxx return Bij,iDjj,True