@@ -786,6 +786,82 @@ def lreshape(data, groups, dropna=True, label=None):
786786
787787 return DataFrame (mdata , columns = id_cols + pivot_cols )
788788
789+ def wide_to_long (df , stubnames , i , j ):
790+ """
791+ Wide panel to long format. Less flexible but more user-friendly than melt.
792+
793+ Parameters
794+ ----------
795+ df : DataFrame
796+ The wide-format DataFrame
797+ stubnames : list
798+ A list of stub names. The wide format variables are assumed to
799+ start with the stub names.
800+ i : str
801+ The name of the id variable.
802+ j : str
803+ The name of the subobservation variable.
804+
805+ Returns
806+ -------
807+ DataFrame
808+ A DataFrame that contains each stub name as a variable as well as
809+ variables for i and j.
810+
811+ Examples
812+ --------
813+ >>> import pandas as pd
814+ >>> import numpy as np
815+ >>> np.random.seed(123)
816+ >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"},
817+ ... "A1980" : {0 : "d", 1 : "e", 2 : "f"},
818+ ... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7},
819+ ... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1},
820+ ... "X" : dict(zip(range(3), np.random.randn(3)))
821+ ... })
822+ >>> df["id"] = df.index
823+ >>> df
824+ A1970 A1980 B1970 B1980 X
825+ 0 a d 2.5 3.2 -1.085631
826+ 1 b e 1.2 1.3 0.997345
827+ 2 c f 0.7 0.1 0.282978
828+ >>> wide_to_long(df, ["A", "B"], i="id", j="year")
829+ X A B
830+ id year
831+ 0 1970 -1.085631 a 2.5
832+ 1 1970 0.997345 b 1.2
833+ 2 1970 0.282978 c 0.7
834+ 0 1980 -1.085631 d 3.2
835+ 1 1980 0.997345 e 1.3
836+ 2 1980 0.282978 f 0.1
837+
838+ Notes
839+ -----
840+ All extra variables are treated as extra id variables. This simply uses
841+ `pandas.melt` under the hood, but is hard-coded to "do the right thing"
842+ in a typicaly case.
843+ """
844+ def get_var_names (df , regex ):
845+ return df .filter (regex = regex ).columns .tolist ()
846+
847+ def melt_stub (df , stub , i , j ):
848+ varnames = get_var_names (df , "^" + stub )
849+ newdf = melt (df , id_vars = i , value_vars = varnames ,
850+ value_name = stub , var_name = j )
851+ newdf [j ] = newdf [j ].str .replace (stub , "" ).astype (int )
852+ return newdf
853+
854+ id_vars = get_var_names (df , "^(?!%s)" % "|" .join (stubnames ))
855+ if i not in id_vars :
856+ id_vars += [i ]
857+
858+ stub = stubnames .pop (0 )
859+ newdf = melt_stub (df , stub , id_vars , j )
860+
861+ for stub in stubnames :
862+ new = melt_stub (df , stub , id_vars , j )
863+ newdf = newdf .merge (new , how = "outer" , on = id_vars + [j ], copy = False )
864+ return newdf .set_index ([i , j ])
789865
790866def convert_dummies (data , cat_variables , prefix_sep = '_' ):
791867 """
0 commit comments