|
11 | 11 |
|
12 | 12 |
|
13 | 13 | def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False): |
| 14 | + """ |
| 15 | + Compute the pairwise euclidean distance between matrices a and b. |
| 16 | +
|
| 17 | +
|
| 18 | + Parameters |
| 19 | + ---------- |
| 20 | + a : np.ndarray (n, f) |
| 21 | + first matrice |
| 22 | + b : np.ndarray (m, f) |
| 23 | + second matrice |
| 24 | + returnAsGPU : boolean, optional (default False) |
| 25 | + if True, returns cudamat matrix still on GPU, else return np.ndarray |
| 26 | + squared : boolean, optional (default False) |
| 27 | + if True, return squared euclidean distance matrice |
| 28 | +
|
| 29 | +
|
| 30 | + Returns |
| 31 | + ------- |
| 32 | + c : (n x m) np.ndarray or cudamat.CUDAMatrix |
| 33 | + pairwise euclidean distance distance matrix |
| 34 | + """ |
14 | 35 | # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m). |
15 | 36 | # First compute in c_GPU the squared euclidean distance. And return its |
16 | 37 | # square root. At each cell [i,j] of c, we want to have |
@@ -46,6 +67,69 @@ def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False): |
46 | 67 | return c_GPU.asarray() |
47 | 68 |
|
48 | 69 |
|
| 70 | +def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, |
| 71 | + numInnerItermax=200, stopInnerThr=1e-9, |
| 72 | + unlabelledValue=-99, verbose=False, log=False): |
| 73 | + p = 0.5 |
| 74 | + epsilon = 1e-3 |
| 75 | + |
| 76 | + # init data |
| 77 | + Nfin = len(b) |
| 78 | + |
| 79 | + indices_labels = [] |
| 80 | + classes = np.unique(labels_a) |
| 81 | + for c in classes: |
| 82 | + idxc, = np.where(labels_a == c) |
| 83 | + indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1))) |
| 84 | + |
| 85 | + Mreg_GPU = cudamat.empty(M_GPU.shape) |
| 86 | + W_GPU = cudamat.empty(M_GPU.shape).assign(0) |
| 87 | + |
| 88 | + for cpt in range(numItermax): |
| 89 | + Mreg_GPU.assign(M_GPU) |
| 90 | + Mreg_GPU.add_mult(W_GPU, eta) |
| 91 | + transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax, |
| 92 | + stopThr=stopInnerThr, returnAsGPU=True) |
| 93 | + # the transport has been computed. Check if classes are really |
| 94 | + # separated |
| 95 | + W_GPU.assign(1) |
| 96 | + W_GPU = W_GPU.transpose() |
| 97 | + all_majs_GPU = [] |
| 98 | + idx_unlabelled = -1 |
| 99 | + for (i, c) in enumerate(classes): |
| 100 | + if c != unlabelledValue: |
| 101 | + (_, nbRow) = indices_labels[i].shape |
| 102 | + tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0) |
| 103 | + transp_GPU.transpose().select_columns(indices_labels[i], |
| 104 | + tmpC_GPU) |
| 105 | + majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon) |
| 106 | + cudamat.pow(majs_GPU, (p-1)) |
| 107 | + majs_GPU.mult(p) |
| 108 | + all_majs_GPU.append(majs_GPU) |
| 109 | + |
| 110 | + tmpC_GPU.assign(0) |
| 111 | + tmpC_GPU.add_col_vec(majs_GPU) |
| 112 | + W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU) |
| 113 | + else: |
| 114 | + idx_unlabelled = i |
| 115 | + |
| 116 | + # now we majorize the unlabelled (if there are any) by the min of |
| 117 | + # the majorizations. do it only for unlabbled data |
| 118 | + if idx_unlabelled != -1: |
| 119 | + all_majs = np.array([m_GPU.asarray() for m_GPU in all_majs_GPU]) |
| 120 | + minMaj_GPU = (cudamat.CUDAMatrix(all_majs).min(axis=0) |
| 121 | + .transpose()) |
| 122 | + (_, nbRow) = indices_labels[idx_unlabelled].shape |
| 123 | + tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0) |
| 124 | + |
| 125 | + tmpC_GPU.add_col_vec(minMaj_GPU) |
| 126 | + W_GPU.set_selected_columns(indices_labels[idx_unlabelled], |
| 127 | + tmpC_GPU) |
| 128 | + W_GPU = W_GPU.transpose() |
| 129 | + |
| 130 | + return transp_GPU.asarray() |
| 131 | + |
| 132 | + |
49 | 133 | class OTDA_GPU(OTDA): |
50 | 134 | def normalizeM(self, norm): |
51 | 135 | if norm == "median": |
@@ -84,3 +168,28 @@ def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs): |
84 | 168 | self.normalizeM(norm) |
85 | 169 | self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs) |
86 | 170 | self.computed = True |
| 171 | + |
| 172 | + |
| 173 | +class OTDA_lpl1(OTDA_GPU): |
| 174 | + def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None, |
| 175 | + **kwargs): |
| 176 | + cudamat.init() |
| 177 | + xs = np.asarray(xs, dtype=np.float64) |
| 178 | + xt = np.asarray(xt, dtype=np.float64) |
| 179 | + |
| 180 | + self.xs = xs |
| 181 | + self.xt = xt |
| 182 | + |
| 183 | + if wt is None: |
| 184 | + wt = unif(xt.shape[0]) |
| 185 | + if ws is None: |
| 186 | + ws = unif(xs.shape[0]) |
| 187 | + |
| 188 | + self.ws = ws |
| 189 | + self.wt = wt |
| 190 | + |
| 191 | + self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True, |
| 192 | + squared=True) |
| 193 | + self.normalizeM(norm) |
| 194 | + self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs) |
| 195 | + self.computed = True |
0 commit comments