Coverage for lasso/dimred/graph_laplacian.py: 0%
47 statements
« prev ^ index » next coverage.py v7.2.4, created at 2023-04-28 18:42 +0100
« prev ^ index » next coverage.py v7.2.4, created at 2023-04-28 18:42 +0100
1from typing import Union
3import numpy as np
4from scipy.sparse import csgraph, dok_matrix
5from scipy.sparse.linalg import eigsh
6from sklearn.neighbors import KDTree
9def run_graph_laplacian(
10 points: np.ndarray,
11 n_eigenmodes: int = 5,
12 min_neighbors: Union[int, None] = None,
13 sigma: Union[float, None] = None,
14 search_radius: Union[float, None] = None,
15):
16 """
17 Compute a graph laplacian.
19 Parameters
20 ----------
21 points : np.ndarray
22 points with features
23 n_eigenmodes : int
24 number of eigenmodes to compute
25 min_neighbors : int
26 The minimum number of neighbors of a point to be considered for the laplacian
27 weights. Can be used to avoid unconnected points.
28 sigma : float
29 The standard deviation of the gaussian normal distribution function used to
30 transform the distances for the inverse distance based weighting.
31 search_radius:
34 Returns
35 -------
36 eigenvalues : np.ndarray
37 eigenvalues from the graph
38 eigenvectors : np.ndarray
39 eigenvectors with shape (n_points x n_eigenvectors)
40 """
41 with np.warnings.catch_warnings():
42 regex_string = (
43 r"the matrix subclass is not the recommended way to represent"
44 + r"matrices or deal with linear algebra"
45 )
46 np.warnings.filterwarnings("ignore", regex_string)
47 lapl = _laplacian_gauss_idw(points, min_neighbors, sigma, search_radius)
48 return _laplacian(lapl, n_eigenmodes)
51def _laplacian_gauss_idw(
52 points: np.ndarray,
53 min_neighbors: Union[int, None] = None,
54 sigma: Union[float, None] = None,
55 search_radius: Union[float, None] = None,
56):
57 """
58 Calculates the laplacian matrix for the sample points of a manifold. The inverse
59 of the gauss-transformed distance is used as weighting of the neighbors.
61 Parameters
62 ----------
63 points: array-like, shape (n_points, n_components) :
64 The sampling points of a manifold.
65 min_neighbors: int
66 The minimum number of neighbors of a point to be considered for the laplacian
67 weights. Can be used to avoid unconnected points.
68 sigma: float
69 The standard deviation of the gaussian normal distribution function used to
70 transform the distances for the inverse distance based weighting.
71 search_radius : float
72 radius search parameter for nearest neighbors
74 Returns
75 -------
76 L: array-like, shape (n_points, n_points)
77 The laplacian matrix for manifold given by its sampling `points`.
78 """
79 assert 2 == points.ndim
81 if min_neighbors is None:
82 min_neighbors = points.shape[1]
84 tree = KDTree(points)
86 if sigma is None:
87 d, _ = tree.query(points, 2 + 2 * points.shape[1], return_distance=True)
88 sigma = np.sum(d[:, -2:])
89 sigma /= 3 * len(points)
91 if search_radius is None:
92 search_radius = 3 * sigma
94 graph = dok_matrix((len(points), len(points)), dtype=np.double)
96 for i, (j, d, e, k) in enumerate(
97 zip(
98 *tree.query_radius(points, return_distance=True, r=search_radius),
99 *tree.query(points, return_distance=True, k=1 + min_neighbors)
100 )
101 ):
102 # Always search for k neighbors, this prevents strongly connected local areas
103 # a little, attracting the eigenfield
105 d, j = e, k
106 k = j != i
107 d, j = d[k], j[k]
108 d **= 2
109 d /= -2 * sigma**2
110 graph[i, j] = d = np.exp(d)
111 graph[j, i] = d[:, np.newaxis]
113 assert 0 == (graph != graph.T).sum()
115 return csgraph.laplacian(graph, normed=True)
118def _laplacian(lapl: csgraph, n_eigenmodes: int = 5):
119 """
120 Compute the laplacian of a graph L
122 Parameters
123 ----------
124 L : csgraph
125 sparse cs graph from scipy
126 n_eigenmodes : int
127 number of eigenmodes to compute
128 points : np.ndarray
129 coordintes of graph nodes (only for plotting)
131 Returns
132 -------
133 eigen_values : np.ndarray
134 eingenvalues of the graph
135 eigen_vecs : np.ndarray
136 eigenvectors of each graph vector (iNode x nEigenmodes)
137 """
139 n_nonzero_eigenvalues = 0
140 n_eigenvalues = int(n_eigenmodes * 1.5)
142 eigen_vals = np.empty((0,))
143 eigen_vecs = np.empty((0, 0))
145 while n_nonzero_eigenvalues < n_eigenmodes:
147 eigen_vals, eigen_vecs = map(np.real, eigsh(lapl, n_eigenvalues, which="SA"))
149 i_start = np.argmax(eigen_vals > 1e-7)
150 n_nonzero_eigenvalues = len(eigen_vals) - i_start
152 if n_nonzero_eigenvalues >= n_eigenmodes:
153 eigen_vecs = eigen_vecs[:, i_start : i_start + n_eigenmodes]
154 eigen_vals = eigen_vals[i_start : i_start + n_eigenmodes]
156 n_eigenvalues = int(n_eigenvalues * 1.5)
158 return eigen_vals, eigen_vecs