Skip to content

Commit 55ff888

Browse files
authored
Merge pull request #10 from aje/master
performance improvement sinkhorn lpl1
2 parents bd325a3 + 9df60d4 commit 55ff888

File tree

4 files changed

+140
-34
lines changed

4 files changed

+140
-34
lines changed

ot/da.py

Lines changed: 15 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,6 @@
1111
from .optim import gcg
1212

1313

14-
def indices(a, func):
15-
return [i for (i, val) in enumerate(a) if func(val)]
16-
17-
1814
def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerItermax = 200,stopInnerThr=1e-9,verbose=False,log=False):
1915
"""
2016
Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization
@@ -46,7 +42,7 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
4642
labels_a : np.ndarray (ns,)
4743
labels of samples in the source domain
4844
b : np.ndarray (nt,)
49-
samples in the target domain
45+
samples weights in the target domain
5046
M : np.ndarray (ns,nt)
5147
loss matrix
5248
reg : float
@@ -86,40 +82,28 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
8682
ot.optim.cg : General regularized OT
8783
8884
"""
89-
p=0.5
85+
p = 0.5
9086
epsilon = 1e-3
9187

92-
# init data
93-
Nini = len(a)
94-
Nfin = len(b)
95-
9688
indices_labels = []
97-
idx_begin = np.min(labels_a)
98-
for c in range(idx_begin,np.max(labels_a)+1):
99-
idxc = indices(labels_a, lambda x: x==c)
89+
classes = np.unique(labels_a)
90+
for c in classes:
91+
idxc, = np.where(labels_a == c)
10092
indices_labels.append(idxc)
10193

102-
W=np.zeros(M.shape)
94+
W = np.zeros(M.shape)
10395

10496
for cpt in range(numItermax):
10597
Mreg = M + eta*W
106-
transp=sinkhorn(a,b,Mreg,reg,numItermax=numInnerItermax, stopThr=stopInnerThr)
107-
# the transport has been computed. Check if classes are really separated
108-
W = np.ones((Nini,Nfin))
109-
for t in range(Nfin):
110-
column = transp[:,t]
111-
all_maj = []
112-
for c in range(idx_begin,np.max(labels_a)+1):
113-
col_c = column[indices_labels[c-idx_begin]]
114-
if c!=-1:
115-
maj = p*((sum(col_c)+epsilon)**(p-1))
116-
W[indices_labels[c-idx_begin],t]=maj
117-
all_maj.append(maj)
118-
119-
# now we majorize the unlabelled by the min of the majorizations
120-
# do it only for unlabbled data
121-
if idx_begin==-1:
122-
W[indices_labels[0],t]=np.min(all_maj)
98+
transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax,
99+
stopThr=stopInnerThr)
100+
# the transport has been computed. Check if classes are really
101+
# separated
102+
W = np.ones(M.shape)
103+
for (i, c) in enumerate(classes):
104+
majs = np.sum(transp[indices_labels[i]], axis=0)
105+
majs = p*((majs+epsilon)**(p-1))
106+
W[indices_labels[i]] = majs
123107

124108
return transp
125109

ot/gpu/bregman.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
11-
log=False):
11+
log=False, returnAsGPU=False):
1212
# init data
1313
Nini = len(a)
1414
Nfin = len(b)
@@ -77,7 +77,13 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
7777

7878
K_GPU.mult_by_col(u_GPU, target=K_GPU)
7979
K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU)
80+
81+
if returnAsGPU:
82+
res = K_GPU
83+
else:
84+
res = K_GPU.asarray()
85+
8086
if log:
81-
return K_GPU.asarray(), log
87+
return res, log
8288
else:
83-
return K_GPU.asarray()
89+
return res

ot/gpu/da.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,27 @@
1111

1212

1313
def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
14+
"""
15+
Compute the pairwise euclidean distance between matrices a and b.
16+
17+
18+
Parameters
19+
----------
20+
a : np.ndarray (n, f)
21+
first matrice
22+
b : np.ndarray (m, f)
23+
second matrice
24+
returnAsGPU : boolean, optional (default False)
25+
if True, returns cudamat matrix still on GPU, else return np.ndarray
26+
squared : boolean, optional (default False)
27+
if True, return squared euclidean distance matrice
28+
29+
30+
Returns
31+
-------
32+
c : (n x m) np.ndarray or cudamat.CUDAMatrix
33+
pairwise euclidean distance distance matrix
34+
"""
1435
# a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
1536
# First compute in c_GPU the squared euclidean distance. And return its
1637
# square root. At each cell [i,j] of c, we want to have
@@ -46,6 +67,48 @@ def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
4667
return c_GPU.asarray()
4768

4869

70+
def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
71+
numInnerItermax=200, stopInnerThr=1e-9,
72+
verbose=False, log=False):
73+
p = 0.5
74+
epsilon = 1e-3
75+
Nfin = len(b)
76+
77+
indices_labels = []
78+
classes = np.unique(labels_a)
79+
for c in classes:
80+
idxc, = np.where(labels_a == c)
81+
indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1)))
82+
83+
Mreg_GPU = cudamat.empty(M_GPU.shape)
84+
W_GPU = cudamat.empty(M_GPU.shape).assign(0)
85+
86+
for cpt in range(numItermax):
87+
Mreg_GPU.assign(M_GPU)
88+
Mreg_GPU.add_mult(W_GPU, eta)
89+
transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax,
90+
stopThr=stopInnerThr, returnAsGPU=True)
91+
# the transport has been computed. Check if classes are really
92+
# separated
93+
W_GPU.assign(1)
94+
W_GPU = W_GPU.transpose()
95+
for (i, c) in enumerate(classes):
96+
(_, nbRow) = indices_labels[i].shape
97+
tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
98+
transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU)
99+
majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon)
100+
cudamat.pow(majs_GPU, (p-1))
101+
majs_GPU.mult(p)
102+
103+
tmpC_GPU.assign(0)
104+
tmpC_GPU.add_col_vec(majs_GPU)
105+
W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU)
106+
107+
W_GPU = W_GPU.transpose()
108+
109+
return transp_GPU.asarray()
110+
111+
49112
class OTDA_GPU(OTDA):
50113
def normalizeM(self, norm):
51114
if norm == "median":
@@ -84,3 +147,28 @@ def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs):
84147
self.normalizeM(norm)
85148
self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs)
86149
self.computed = True
150+
151+
152+
class OTDA_lpl1(OTDA_GPU):
153+
def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None,
154+
**kwargs):
155+
cudamat.init()
156+
xs = np.asarray(xs, dtype=np.float64)
157+
xt = np.asarray(xt, dtype=np.float64)
158+
159+
self.xs = xs
160+
self.xt = xt
161+
162+
if wt is None:
163+
wt = unif(xt.shape[0])
164+
if ws is None:
165+
ws = unif(xs.shape[0])
166+
167+
self.ws = ws
168+
self.wt = wt
169+
170+
self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
171+
squared=True)
172+
self.normalizeM(norm)
173+
self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs)
174+
self.computed = True

test/test_gpu_sinkhorn_lpl1.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import ot
2+
import numpy as np
3+
import time
4+
import ot.gpu
5+
6+
7+
def describeRes(r):
8+
print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}"
9+
.format(np.min(r), np.max(r), np.mean(r), np.std(r)))
10+
11+
for n in [5000, 10000, 15000, 20000]:
12+
print(n)
13+
a = np.random.rand(n // 4, 100)
14+
labels_a = np.random.randint(10, size=(n // 4))
15+
b = np.random.rand(n, 100)
16+
time1 = time.time()
17+
transport = ot.da.OTDA_lpl1()
18+
transport.fit(a, labels_a, b)
19+
G1 = transport.G
20+
time2 = time.time()
21+
transport = ot.gpu.da.OTDA_lpl1()
22+
transport.fit(a, labels_a, b)
23+
G2 = transport.G
24+
time3 = time.time()
25+
print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format(time2 - time1))
26+
describeRes(G1)
27+
print(" GPU sinkhorn lpl1, time: {:6.2f} sec ".format(time3 - time2))
28+
describeRes(G2)

0 commit comments

Comments
 (0)