|
123 | 123 | ) |
124 | 124 |
|
125 | 125 |
|
126 | | -class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine): |
127 | | - """ |
128 | | - This class manages a MultiIndex by mapping label combinations to positive |
129 | | - integers. |
| 126 | +class MultiIndexUInt64Engine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine): |
| 127 | + """Manages a MultiIndex by mapping label combinations to positive integers. |
| 128 | +
|
| 129 | + The number of possible label combinations must not overflow the 64 bits integers. |
130 | 130 | """ |
131 | 131 |
|
132 | 132 | _base = libindex.UInt64Engine |
| 133 | + _codes_dtype = "uint64" |
133 | 134 |
|
134 | | - def _codes_to_ints(self, codes): |
135 | | - """ |
136 | | - Transform combination(s) of uint64 in one uint64 (each), in a strictly |
137 | | - monotonic way (i.e. respecting the lexicographic order of integer |
138 | | - combinations): see BaseMultiIndexCodesEngine documentation. |
139 | 135 |
|
140 | | - Parameters |
141 | | - ---------- |
142 | | - codes : 1- or 2-dimensional array of dtype uint64 |
143 | | - Combinations of integers (one per row) |
| 136 | +class MultiIndexUInt32Engine(libindex.BaseMultiIndexCodesEngine, libindex.UInt32Engine): |
| 137 | + """Manages a MultiIndex by mapping label combinations to positive integers. |
144 | 138 |
|
145 | | - Returns |
146 | | - ------- |
147 | | - scalar or 1-dimensional array, of dtype uint64 |
148 | | - Integer(s) representing one combination (each). |
149 | | - """ |
150 | | - # Shift the representation of each level by the pre-calculated number |
151 | | - # of bits: |
152 | | - codes <<= self.offsets |
| 139 | + The number of possible label combinations must not overflow the 32 bits integers. |
| 140 | + """ |
153 | 141 |
|
154 | | - # Now sum and OR are in fact interchangeable. This is a simple |
155 | | - # composition of the (disjunct) significant bits of each level (i.e. |
156 | | - # each column in "codes") in a single positive integer: |
157 | | - if codes.ndim == 1: |
158 | | - # Single key |
159 | | - return np.bitwise_or.reduce(codes) |
| 142 | + _base = libindex.UInt32Engine |
| 143 | + _codes_dtype = "uint32" |
160 | 144 |
|
161 | | - # Multiple keys |
162 | | - return np.bitwise_or.reduce(codes, axis=1) |
163 | 145 |
|
| 146 | +class MultiIndexUInt16Engine(libindex.BaseMultiIndexCodesEngine, libindex.UInt16Engine): |
| 147 | + """Manages a MultiIndex by mapping label combinations to positive integers. |
164 | 148 |
|
165 | | -class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine): |
166 | | - """ |
167 | | - This class manages those (extreme) cases in which the number of possible |
168 | | - label combinations overflows the 64 bits integers, and uses an ObjectEngine |
169 | | - containing Python integers. |
| 149 | + The number of possible label combinations must not overflow the 16 bits integers. |
170 | 150 | """ |
171 | 151 |
|
172 | | - _base = libindex.ObjectEngine |
| 152 | + _base = libindex.UInt16Engine |
| 153 | + _codes_dtype = "uint16" |
173 | 154 |
|
174 | | - def _codes_to_ints(self, codes): |
175 | | - """ |
176 | | - Transform combination(s) of uint64 in one Python integer (each), in a |
177 | | - strictly monotonic way (i.e. respecting the lexicographic order of |
178 | | - integer combinations): see BaseMultiIndexCodesEngine documentation. |
179 | 155 |
|
180 | | - Parameters |
181 | | - ---------- |
182 | | - codes : 1- or 2-dimensional array of dtype uint64 |
183 | | - Combinations of integers (one per row) |
| 156 | +class MultiIndexUInt8Engine(libindex.BaseMultiIndexCodesEngine, libindex.UInt8Engine): |
| 157 | + """Manages a MultiIndex by mapping label combinations to positive integers. |
184 | 158 |
|
185 | | - Returns |
186 | | - ------- |
187 | | - int, or 1-dimensional array of dtype object |
188 | | - Integer(s) representing one combination (each). |
189 | | - """ |
190 | | - # Shift the representation of each level by the pre-calculated number |
191 | | - # of bits. Since this can overflow uint64, first make sure we are |
192 | | - # working with Python integers: |
193 | | - codes = codes.astype("object") << self.offsets |
| 159 | + The number of possible label combinations must not overflow the 8 bits integers. |
| 160 | + """ |
194 | 161 |
|
195 | | - # Now sum and OR are in fact interchangeable. This is a simple |
196 | | - # composition of the (disjunct) significant bits of each level (i.e. |
197 | | - # each column in "codes") in a single positive integer (per row): |
198 | | - if codes.ndim == 1: |
199 | | - # Single key |
200 | | - return np.bitwise_or.reduce(codes) |
| 162 | + _base = libindex.UInt8Engine |
| 163 | + _codes_dtype = "uint8" |
201 | 164 |
|
202 | | - # Multiple keys |
203 | | - return np.bitwise_or.reduce(codes, axis=1) |
| 165 | + |
| 166 | +class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine): |
| 167 | + """Manages a MultiIndex by mapping label combinations to positive integers. |
| 168 | +
|
| 169 | + This class manages those (extreme) cases in which the number of possible |
| 170 | + label combinations overflows the 64 bits integers, and uses an ObjectEngine |
| 171 | + containing Python integers. |
| 172 | + """ |
| 173 | + |
| 174 | + _base = libindex.ObjectEngine |
| 175 | + _codes_dtype = "object" |
204 | 176 |
|
205 | 177 |
|
206 | 178 | def names_compat(meth: F) -> F: |
@@ -1229,13 +1201,25 @@ def _engine(self): |
1229 | 1201 | # equivalent to sorting lexicographically the codes themselves. Notice |
1230 | 1202 | # that each level needs to be shifted by the number of bits needed to |
1231 | 1203 | # represent the _previous_ ones: |
1232 | | - offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64") |
| 1204 | + offsets = np.concatenate([lev_bits[1:], [0]]) |
| 1205 | + # Downcast the type if possible, to prevent upcasting when shifting codes: |
| 1206 | + offsets = offsets.astype(np.min_scalar_type(int(offsets[0]))) |
1233 | 1207 |
|
1234 | 1208 | # Check the total number of bits needed for our representation: |
1235 | 1209 | if lev_bits[0] > 64: |
1236 | 1210 | # The levels would overflow a 64 bit uint - use Python integers: |
1237 | 1211 | return MultiIndexPyIntEngine(self.levels, self.codes, offsets) |
1238 | | - return MultiIndexUIntEngine(self.levels, self.codes, offsets) |
| 1212 | + if lev_bits[0] > 32: |
| 1213 | + # The levels would overflow a 32 bit uint - use uint64 |
| 1214 | + return MultiIndexUInt64Engine(self.levels, self.codes, offsets) |
| 1215 | + if lev_bits[0] > 16: |
| 1216 | + # The levels would overflow a 16 bit uint - use uint8 |
| 1217 | + return MultiIndexUInt32Engine(self.levels, self.codes, offsets) |
| 1218 | + if lev_bits[0] > 8: |
| 1219 | + # The levels would overflow a 8 bit uint - use uint16 |
| 1220 | + return MultiIndexUInt16Engine(self.levels, self.codes, offsets) |
| 1221 | + # The levels fit in an 8 bit uint - use uint8 |
| 1222 | + return MultiIndexUInt8Engine(self.levels, self.codes, offsets) |
1239 | 1223 |
|
1240 | 1224 | # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return |
1241 | 1225 | # type "Type[MultiIndex]" in supertype "Index" |
|
0 commit comments