Coverage for lasso/dimred/graph

1from typing import Union

3import numpy as np

4from scipy.sparse import csgraph, dok_matrix

5from scipy.sparse.linalg import eigsh

6from sklearn.neighbors import KDTree

9def run_graph_laplacian(

10 points: np.ndarray,

11 n_eigenmodes: int = 5,

12 min_neighbors: Union[int, None] = None,

13 sigma: Union[float, None] = None,

14 search_radius: Union[float, None] = None,

15):

16 """

17 Compute a graph laplacian.

19 Parameters

20 ----------

21 points : np.ndarray

22 points with features

23 n_eigenmodes : int

24 number of eigenmodes to compute

25 min_neighbors : int

26 The minimum number of neighbors of a point to be considered for the laplacian

27 weights. Can be used to avoid unconnected points.

28 sigma : float

29 The standard deviation of the gaussian normal distribution function used to

30 transform the distances for the inverse distance based weighting.

31 search_radius:

34 Returns

35 -------

36 eigenvalues : np.ndarray

37 eigenvalues from the graph

38 eigenvectors : np.ndarray

39 eigenvectors with shape (n_points x n_eigenvectors)

40 """

41 with np.warnings.catch_warnings():

42 regex_string = (

43 r"the matrix subclass is not the recommended way to represent"

44 + r"matrices or deal with linear algebra"

45 )

46 np.warnings.filterwarnings("ignore", regex_string)

47 lapl = _laplacian_gauss_idw(points, min_neighbors, sigma, search_radius)

48 return _laplacian(lapl, n_eigenmodes)

51def _laplacian_gauss_idw(

52 points: np.ndarray,

53 min_neighbors: Union[int, None] = None,

54 sigma: Union[float, None] = None,

55 search_radius: Union[float, None] = None,

56):

57 """

58 Calculates the laplacian matrix for the sample points of a manifold. The inverse

59 of the gauss-transformed distance is used as weighting of the neighbors.

61 Parameters

62 ----------

63 points: array-like, shape (n_points, n_components) :

64 The sampling points of a manifold.

65 min_neighbors: int

66 The minimum number of neighbors of a point to be considered for the laplacian

67 weights. Can be used to avoid unconnected points.

68 sigma: float

69 The standard deviation of the gaussian normal distribution function used to

70 transform the distances for the inverse distance based weighting.

71 search_radius : float

72 radius search parameter for nearest neighbors

74 Returns

75 -------

76 L: array-like, shape (n_points, n_points)

77 The laplacian matrix for manifold given by its sampling `points`.

78 """

79 assert 2 == points.ndim

81 if min_neighbors is None:

82 min_neighbors = points.shape[1]

84 tree = KDTree(points)

86 if sigma is None:

87 d, _ = tree.query(points, 2 + 2 * points.shape[1], return_distance=True)

88 sigma = np.sum(d[:, -2:])

89 sigma /= 3 * len(points)

91 if search_radius is None:

92 search_radius = 3 * sigma

94 graph = dok_matrix((len(points), len(points)), dtype=np.double)

96 for i, (j, d, e, k) in enumerate(

97 zip(

98 *tree.query_radius(points, return_distance=True, r=search_radius),

99 *tree.query(points, return_distance=True, k=1 + min_neighbors)

100 )

101 ):

102 # Always search for k neighbors, this prevents strongly connected local areas

103 # a little, attracting the eigenfield

104

105 d, j = e, k

106 k = j != i

107 d, j = d[k], j[k]

108 d **= 2

109 d /= -2 * sigma**2

110 graph[i, j] = d = np.exp(d)

111 graph[j, i] = d[:, np.newaxis]

112

113 assert 0 == (graph != graph.T).sum()

114

115 return csgraph.laplacian(graph, normed=True)

116

117

118def _laplacian(lapl: csgraph, n_eigenmodes: int = 5):

119 """

120 Compute the laplacian of a graph L

121

122 Parameters

123 ----------

124 L : csgraph

125 sparse cs graph from scipy

126 n_eigenmodes : int

127 number of eigenmodes to compute

128 points : np.ndarray

129 coordintes of graph nodes (only for plotting)

130

131 Returns

132 -------

133 eigen_values : np.ndarray

134 eingenvalues of the graph

135 eigen_vecs : np.ndarray

136 eigenvectors of each graph vector (iNode x nEigenmodes)

137 """

138

139 n_nonzero_eigenvalues = 0

140 n_eigenvalues = int(n_eigenmodes * 1.5)

141

142 eigen_vals = np.empty((0,))

143 eigen_vecs = np.empty((0, 0))

144

145 while n_nonzero_eigenvalues < n_eigenmodes:

146

147 eigen_vals, eigen_vecs = map(np.real, eigsh(lapl, n_eigenvalues, which="SA"))

148

149 i_start = np.argmax(eigen_vals > 1e-7)

150 n_nonzero_eigenvalues = len(eigen_vals) - i_start

151

152 if n_nonzero_eigenvalues >= n_eigenmodes:

153 eigen_vecs = eigen_vecs[:, i_start : i_start + n_eigenmodes]

154 eigen_vals = eigen_vals[i_start : i_start + n_eigenmodes]

155

156 n_eigenvalues = int(n_eigenvalues * 1.5)

157

158 return eigen_vals, eigen_vecs

Coverage for lasso/dimred/graph_laplacian.py: 0%

47 statements