Coverage for lasso/math/sampling.py: 59%
22 statements
« prev ^ index » next coverage.py v7.2.4, created at 2023-04-28 18:42 +0100
« prev ^ index » next coverage.py v7.2.4, created at 2023-04-28 18:42 +0100
1import random
2from typing import Union
3import numpy as np
4from sklearn.neighbors import KDTree
7def unique_subsamples(start: int, end: int, n_samples: int, seed=None) -> np.ndarray:
8 """Retrieve unique subsample indexes
10 Parameters
11 ----------
12 start: int
13 starting index of population
14 end: int
15 ending index of population (end <= start)
16 n_samples: int
17 number of samples to draw
18 seed: int
19 seed for random number generator
21 Returns
22 -------
23 indexes: np.ndarray
24 unique sample indexes
25 """
26 assert start <= end
28 if end - start < n_samples:
29 n_samples = end - start
31 random.seed(seed)
32 indexes = np.array(random.sample(range(start, end), n_samples), dtype=np.int64)
33 random.seed()
34 return indexes
37def homogenize_density(
38 points: np.ndarray,
39 dim: int = 2,
40 target_distance: Union[float, None] = None,
41 n_neighbors: int = 18,
42 seed=None,
43) -> np.ndarray:
44 """homogenize a cloud density by probabilities
46 Parameters
47 ----------
48 points: np.ndarray
49 point cloud
50 dim: int
51 intrinsic dimension of the data
52 target_distance: float
53 target distance to aim for
54 n_neighbors: int
55 neighbors used for computation of average neighborhood distance
56 seed: int
57 seed for random number generator
59 Returns
60 -------
61 is_selected: np.ndarray
62 boolean array indicating which subsamples were selected
63 """
64 n_neighbors = min(n_neighbors, len(points))
66 random.seed(seed)
67 d, _ = KDTree(points).query(points, k=n_neighbors + 1)
68 d_average = np.average(d[:, 1:], axis=1)
69 if target_distance is None:
70 target_distance = np.median(d_average)
71 is_selected = np.array(
72 [
73 dist >= target_distance or random.random() < (dist / target_distance) ** dim
74 for i, dist in enumerate(d_average)
75 ]
76 )
77 random.seed()
78 return is_selected