@@ -1171,11 +1171,13 @@ cdef class StringHashTable(HashTable):
11711171 const char **vecs
11721172 khiter_t k
11731173 bint use_na_value
1174+ bint non_null_na_value
11741175
11751176 if return_inverse:
11761177 labels = np.zeros(n, dtype=np.intp)
11771178 uindexer = np.empty(n, dtype=np.int64)
11781179 use_na_value = na_value is not None
1180+ non_null_na_value = not checknull(na_value)
11791181
11801182 # assign pointers and pre-filter out missing (if ignore_na)
11811183 vecs = <const char **>malloc(n * sizeof(char *))
@@ -1186,7 +1188,12 @@ cdef class StringHashTable(HashTable):
11861188
11871189 if (ignore_na
11881190 and (not isinstance(val, str)
1189- or (use_na_value and val == na_value))):
1191+ or (use_na_value and (
1192+ (non_null_na_value and val == na_value) or
1193+ (not non_null_na_value and is_matching_na(val, na_value)))
1194+ )
1195+ )
1196+ ):
11901197 # if missing values do not count as unique values (i.e. if
11911198 # ignore_na is True), we can skip the actual value, and
11921199 # replace the label with na_sentinel directly
@@ -1452,18 +1459,23 @@ cdef class PyObjectHashTable(HashTable):
14521459 object val
14531460 khiter_t k
14541461 bint use_na_value
1455-
1462+ bint non_null_na_value
14561463 if return_inverse:
14571464 labels = np.empty(n, dtype=np.intp)
14581465 use_na_value = na_value is not None
1466+ non_null_na_value = not checknull(na_value)
14591467
14601468 for i in range(n):
14611469 val = values[i]
14621470 hash(val)
14631471
14641472 if ignore_na and (
14651473 checknull(val)
1466- or (use_na_value and val == na_value)
1474+ or (use_na_value and (
1475+ (non_null_na_value and val == na_value) or
1476+ (not non_null_na_value and is_matching_na(val, na_value))
1477+ )
1478+ )
14671479 ):
14681480 # if missing values do not count as unique values (i.e. if
14691481 # ignore_na is True), skip the hashtable entry for them, and
0 commit comments