Merge pull request #10 from aje/master

ncourty · web-flow · commit 55ff888aab4e · 2017-04-27T11:17:59.000+02:00
performance improvement sinkhorn lpl1
diff --git a/ot/da.py b/ot/da.py
@@ -11,10 +11,6 @@
 from .optim import gcg
 
 
-def indices(a, func):
-    return [i for (i, val) in enumerate(a) if func(val)]
-
-
 def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerItermax = 200,stopInnerThr=1e-9,verbose=False,log=False):
     """
     Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization
@@ -46,7 +42,7 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
     labels_a : np.ndarray (ns,)
         labels of samples in the source domain
     b : np.ndarray (nt,)
-        samples in the target domain
+        samples weights in the target domain
     M : np.ndarray (ns,nt)
         loss matrix
     reg : float
@@ -86,40 +82,28 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
     ot.optim.cg : General regularized OT
 
     """
-    p=0.5
+    p = 0.5
     epsilon = 1e-3
 
-    # init data
-    Nini = len(a)
-    Nfin = len(b)
-
     indices_labels = []
-    idx_begin = np.min(labels_a)
-    for c in range(idx_begin,np.max(labels_a)+1):
-        idxc = indices(labels_a, lambda x: x==c)
+    classes = np.unique(labels_a)
+    for c in classes:
+        idxc, = np.where(labels_a == c)
         indices_labels.append(idxc)
 
-    W=np.zeros(M.shape)
+    W = np.zeros(M.shape)
 
     for cpt in range(numItermax):
         Mreg = M + eta*W
-        transp=sinkhorn(a,b,Mreg,reg,numItermax=numInnerItermax, stopThr=stopInnerThr)
-        # the transport has been computed. Check if classes are really separated
-        W = np.ones((Nini,Nfin))
-        for t in range(Nfin):
-            column = transp[:,t]
-            all_maj = []
-            for c in range(idx_begin,np.max(labels_a)+1):
-                col_c = column[indices_labels[c-idx_begin]]
-                if c!=-1:
-                    maj = p*((sum(col_c)+epsilon)**(p-1))
-                    W[indices_labels[c-idx_begin],t]=maj
-                    all_maj.append(maj)
-
-            # now we majorize the unlabelled by the min of the majorizations
-            # do it only for unlabbled data
-            if idx_begin==-1:
-                W[indices_labels[0],t]=np.min(all_maj)
+        transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax,
+                          stopThr=stopInnerThr)
+        # the transport has been computed. Check if classes are really
+        # separated
+        W = np.ones(M.shape)
+        for (i, c) in enumerate(classes):
+            majs = np.sum(transp[indices_labels[i]], axis=0)
+            majs = p*((majs+epsilon)**(p-1))
+            W[indices_labels[i]] = majs
 
     return transp
 
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
@@ -8,7 +8,7 @@
 
 
 def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
-                log=False):
+                log=False, returnAsGPU=False):
     # init data
     Nini = len(a)
     Nfin = len(b)
@@ -77,7 +77,13 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
 
     K_GPU.mult_by_col(u_GPU, target=K_GPU)
     K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU)
+
+    if returnAsGPU:
+        res = K_GPU
+    else:
+        res = K_GPU.asarray()
+
     if log:
-        return K_GPU.asarray(), log
+        return res, log
     else:
-        return K_GPU.asarray()
+        return res
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
@@ -11,6 +11,27 @@
 
 
 def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
+    """
+    Compute the pairwise euclidean distance between matrices a and b.
+
+
+    Parameters
+    ----------
+    a : np.ndarray (n, f)
+        first matrice
+    b : np.ndarray (m, f)
+        second matrice
+    returnAsGPU : boolean, optional (default False)
+        if True, returns cudamat matrix still on GPU, else return np.ndarray
+    squared : boolean, optional (default False)
+        if True, return squared euclidean distance matrice
+
+
+    Returns
+    -------
+    c : (n x m) np.ndarray or cudamat.CUDAMatrix
+        pairwise euclidean distance distance matrix
+    """
     # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
     # First compute in c_GPU the squared euclidean distance. And return its
     # square root. At each cell [i,j] of c, we want to have
@@ -46,6 +67,48 @@ def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
         return c_GPU.asarray()
 
 
+def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
+                     numInnerItermax=200, stopInnerThr=1e-9,
+                     verbose=False, log=False):
+    p = 0.5
+    epsilon = 1e-3
+    Nfin = len(b)
+
+    indices_labels = []
+    classes = np.unique(labels_a)
+    for c in classes:
+        idxc, = np.where(labels_a == c)
+        indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1)))
+
+    Mreg_GPU = cudamat.empty(M_GPU.shape)
+    W_GPU = cudamat.empty(M_GPU.shape).assign(0)
+
+    for cpt in range(numItermax):
+        Mreg_GPU.assign(M_GPU)
+        Mreg_GPU.add_mult(W_GPU, eta)
+        transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax,
+                              stopThr=stopInnerThr, returnAsGPU=True)
+        # the transport has been computed. Check if classes are really
+        # separated
+        W_GPU.assign(1)
+        W_GPU = W_GPU.transpose()
+        for (i, c) in enumerate(classes):
+            (_, nbRow) = indices_labels[i].shape
+            tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
+            transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU)
+            majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon)
+            cudamat.pow(majs_GPU, (p-1))
+            majs_GPU.mult(p)
+
+            tmpC_GPU.assign(0)
+            tmpC_GPU.add_col_vec(majs_GPU)
+            W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU)
+
+        W_GPU = W_GPU.transpose()
+
+    return transp_GPU.asarray()
+
+
 class OTDA_GPU(OTDA):
     def normalizeM(self, norm):
         if norm == "median":
@@ -84,3 +147,28 @@ def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs):
         self.normalizeM(norm)
         self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs)
         self.computed = True
+
+
+class OTDA_lpl1(OTDA_GPU):
+    def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None,
+            **kwargs):
+        cudamat.init()
+        xs = np.asarray(xs, dtype=np.float64)
+        xt = np.asarray(xt, dtype=np.float64)
+
+        self.xs = xs
+        self.xt = xt
+
+        if wt is None:
+            wt = unif(xt.shape[0])
+        if ws is None:
+            ws = unif(xs.shape[0])
+
+        self.ws = ws
+        self.wt = wt
+
+        self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
+                                          squared=True)
+        self.normalizeM(norm)
+        self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs)
+        self.computed = True
diff --git a/test/test_gpu_sinkhorn_lpl1.py b/test/test_gpu_sinkhorn_lpl1.py
@@ -0,0 +1,28 @@
+import ot
+import numpy as np
+import time
+import ot.gpu
+
+
+def describeRes(r):
+    print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}"
+          .format(np.min(r), np.max(r), np.mean(r), np.std(r)))
+
+for n in [5000, 10000, 15000, 20000]:
+    print(n)
+    a = np.random.rand(n // 4, 100)
+    labels_a = np.random.randint(10, size=(n // 4))
+    b = np.random.rand(n, 100)
+    time1 = time.time()
+    transport = ot.da.OTDA_lpl1()
+    transport.fit(a, labels_a, b)
+    G1 = transport.G
+    time2 = time.time()
+    transport = ot.gpu.da.OTDA_lpl1()
+    transport.fit(a, labels_a, b)
+    G2 = transport.G
+    time3 = time.time()
+    print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format(time2 - time1))
+    describeRes(G1)
+    print("   GPU sinkhorn lpl1, time: {:6.2f} sec ".format(time3 - time2))
+    describeRes(G2)