Coverage for lasso/dimred/graph_laplacian.py: 0%

47 statements  

« prev     ^ index     » next       coverage.py v7.2.4, created at 2023-04-28 18:42 +0100

1from typing import Union 

2 

3import numpy as np 

4from scipy.sparse import csgraph, dok_matrix 

5from scipy.sparse.linalg import eigsh 

6from sklearn.neighbors import KDTree 

7 

8 

9def run_graph_laplacian( 

10 points: np.ndarray, 

11 n_eigenmodes: int = 5, 

12 min_neighbors: Union[int, None] = None, 

13 sigma: Union[float, None] = None, 

14 search_radius: Union[float, None] = None, 

15): 

16 """ 

17 Compute a graph laplacian. 

18 

19 Parameters 

20 ---------- 

21 points : np.ndarray 

22 points with features 

23 n_eigenmodes : int 

24 number of eigenmodes to compute 

25 min_neighbors : int 

26 The minimum number of neighbors of a point to be considered for the laplacian 

27 weights. Can be used to avoid unconnected points. 

28 sigma : float 

29 The standard deviation of the gaussian normal distribution function used to 

30 transform the distances for the inverse distance based weighting. 

31 search_radius: 

32 

33 

34 Returns 

35 ------- 

36 eigenvalues : np.ndarray 

37 eigenvalues from the graph 

38 eigenvectors : np.ndarray 

39 eigenvectors with shape (n_points x n_eigenvectors) 

40 """ 

41 with np.warnings.catch_warnings(): 

42 regex_string = ( 

43 r"the matrix subclass is not the recommended way to represent" 

44 + r"matrices or deal with linear algebra" 

45 ) 

46 np.warnings.filterwarnings("ignore", regex_string) 

47 lapl = _laplacian_gauss_idw(points, min_neighbors, sigma, search_radius) 

48 return _laplacian(lapl, n_eigenmodes) 

49 

50 

51def _laplacian_gauss_idw( 

52 points: np.ndarray, 

53 min_neighbors: Union[int, None] = None, 

54 sigma: Union[float, None] = None, 

55 search_radius: Union[float, None] = None, 

56): 

57 """ 

58 Calculates the laplacian matrix for the sample points of a manifold. The inverse 

59 of the gauss-transformed distance is used as weighting of the neighbors. 

60 

61 Parameters 

62 ---------- 

63 points: array-like, shape (n_points, n_components) : 

64 The sampling points of a manifold. 

65 min_neighbors: int 

66 The minimum number of neighbors of a point to be considered for the laplacian 

67 weights. Can be used to avoid unconnected points. 

68 sigma: float 

69 The standard deviation of the gaussian normal distribution function used to 

70 transform the distances for the inverse distance based weighting. 

71 search_radius : float 

72 radius search parameter for nearest neighbors 

73 

74 Returns 

75 ------- 

76 L: array-like, shape (n_points, n_points) 

77 The laplacian matrix for manifold given by its sampling `points`. 

78 """ 

79 assert 2 == points.ndim 

80 

81 if min_neighbors is None: 

82 min_neighbors = points.shape[1] 

83 

84 tree = KDTree(points) 

85 

86 if sigma is None: 

87 d, _ = tree.query(points, 2 + 2 * points.shape[1], return_distance=True) 

88 sigma = np.sum(d[:, -2:]) 

89 sigma /= 3 * len(points) 

90 

91 if search_radius is None: 

92 search_radius = 3 * sigma 

93 

94 graph = dok_matrix((len(points), len(points)), dtype=np.double) 

95 

96 for i, (j, d, e, k) in enumerate( 

97 zip( 

98 *tree.query_radius(points, return_distance=True, r=search_radius), 

99 *tree.query(points, return_distance=True, k=1 + min_neighbors) 

100 ) 

101 ): 

102 # Always search for k neighbors, this prevents strongly connected local areas 

103 # a little, attracting the eigenfield 

104 

105 d, j = e, k 

106 k = j != i 

107 d, j = d[k], j[k] 

108 d **= 2 

109 d /= -2 * sigma**2 

110 graph[i, j] = d = np.exp(d) 

111 graph[j, i] = d[:, np.newaxis] 

112 

113 assert 0 == (graph != graph.T).sum() 

114 

115 return csgraph.laplacian(graph, normed=True) 

116 

117 

118def _laplacian(lapl: csgraph, n_eigenmodes: int = 5): 

119 """ 

120 Compute the laplacian of a graph L 

121 

122 Parameters 

123 ---------- 

124 L : csgraph 

125 sparse cs graph from scipy 

126 n_eigenmodes : int 

127 number of eigenmodes to compute 

128 points : np.ndarray 

129 coordintes of graph nodes (only for plotting) 

130 

131 Returns 

132 ------- 

133 eigen_values : np.ndarray 

134 eingenvalues of the graph 

135 eigen_vecs : np.ndarray 

136 eigenvectors of each graph vector (iNode x nEigenmodes) 

137 """ 

138 

139 n_nonzero_eigenvalues = 0 

140 n_eigenvalues = int(n_eigenmodes * 1.5) 

141 

142 eigen_vals = np.empty((0,)) 

143 eigen_vecs = np.empty((0, 0)) 

144 

145 while n_nonzero_eigenvalues < n_eigenmodes: 

146 

147 eigen_vals, eigen_vecs = map(np.real, eigsh(lapl, n_eigenvalues, which="SA")) 

148 

149 i_start = np.argmax(eigen_vals > 1e-7) 

150 n_nonzero_eigenvalues = len(eigen_vals) - i_start 

151 

152 if n_nonzero_eigenvalues >= n_eigenmodes: 

153 eigen_vecs = eigen_vecs[:, i_start : i_start + n_eigenmodes] 

154 eigen_vals = eigen_vals[i_start : i_start + n_eigenmodes] 

155 

156 n_eigenvalues = int(n_eigenvalues * 1.5) 

157 

158 return eigen_vals, eigen_vecs