Skip to content

Commit 04e4cce

Browse files
reimplementing dna_four_bit.hpp to pack reads into size_ts instead of uint8_t
1 parent 4eb53d9 commit 04e4cce

File tree

1 file changed

+44
-68
lines changed

1 file changed

+44
-68
lines changed

dna_four_bit.hpp

Lines changed: 44 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -27,40 +27,24 @@ extern char dna_four_bit_decoding[16];
2727

2828
template <typename uint_type> constexpr
2929
uint_type
30-
get_low_nibble(const uint_type x) {return x & 15u;}
31-
32-
template <typename uint_type> constexpr
33-
uint_type
34-
get_high_nibble(const uint_type x) {return (x >> 4) & 15u;}
35-
36-
template <typename uint_type> constexpr
37-
char
38-
decode_dna_four_bit_low(const uint_type x) {
39-
return dna_four_bit_decoding[get_low_nibble(x)];
40-
}
41-
42-
template <typename uint_type> constexpr
43-
char
44-
decode_dna_four_bit_high(const uint_type x) {
45-
return dna_four_bit_decoding[get_high_nibble(x)];
30+
get_nibble(const uint_type x, const size_t offset) {
31+
return (x >> (4*offset)) & 15ul;
4632
}
4733

4834
template <typename uint_type> constexpr
4935
char
5036
decode_dna_four_bit(const uint_type x,
51-
const base_in_byte b = base_in_byte::left) {
52-
return b == base_in_byte::left ?
53-
decode_dna_four_bit_low(x) :
54-
decode_dna_four_bit_high(x);
37+
const size_t offset) {
38+
return dna_four_bit_decoding[get_nibble(x, offset)];
5539
}
5640

5741
template<class InputItr, class OutputIt>
5842
OutputIt
5943
decode_dna_four_bit(InputItr first, InputItr last, OutputIt d_first) {
6044
// ADS: assume destination has enough space
6145
while (first != last) {
62-
*d_first++ = decode_dna_four_bit(*first, base_in_byte::left);
63-
*d_first++ = decode_dna_four_bit(*first, base_in_byte::right);
46+
for (size_t offset = 0; offset < 16; ++offset)
47+
*d_first++ = decode_dna_four_bit(*first, offset);
6448
++first;
6549
}
6650
// if original sequence length is odd and encoding not padded at the front,
@@ -73,95 +57,87 @@ void
7357
decode_dna_four_bit(const InCtr &source, OutCtr &dest) {
7458
// expand out the bytes as pairs (do this backwards in case source == dest)
7559
const size_t source_size = source.size();
76-
dest.resize(2*source_size);
60+
dest.resize(16*source_size);
7761
size_t i = source_size;
7862
size_t j = dest.size();
7963
while (i > 0) {
8064
dest[--j] = source[--i];
8165
dest[--j] = source[i];
8266
}
83-
for (i = 0; i < dest.size(); i += 2) {
84-
dest[i] = decode_dna_four_bit(dest[i], base_in_byte::left);
85-
dest[i+1] = decode_dna_four_bit(dest[i+1], base_in_byte::right);
67+
for (i = 0; i < dest.size(); i += 16) {
68+
for (size_t offset = 0; offset < 16; ++offset)
69+
dest[i + offset] = decode_dna_four_bit(dest[i], offset);
8670
}
8771
}
8872

8973
extern uint8_t dna_four_bit_encoding[128];
90-
9174
template <typename uint_type> constexpr
92-
uint8_t
93-
encode_dna_four_bit_low(const uint_type x) {
94-
return dna_four_bit_encoding[static_cast<unsigned>(x)];
95-
}
96-
97-
template <typename uint_type> constexpr
98-
uint8_t
99-
encode_dna_four_bit_high(const uint_type x) {
100-
return dna_four_bit_encoding[static_cast<unsigned>(x)] << 4;
101-
}
102-
103-
template <typename uint_type> constexpr
104-
uint8_t
75+
size_t
10576
encode_dna_four_bit(const uint_type x,
106-
const base_in_byte b = base_in_byte::left) {
107-
return b == base_in_byte::left ?
108-
encode_dna_four_bit_low(x) :
109-
encode_dna_four_bit_high(x);
77+
const size_t offset) {
78+
return (static_cast<size_t>(
79+
dna_four_bit_encoding[static_cast<unsigned>(x)])
80+
) << (4*offset);
11081
}
11182

11283
template<class InputItr, class OutputIt>
11384
OutputIt
11485
encode_dna_four_bit(InputItr first, InputItr last, OutputIt d_first) {
11586
while (first != last) {
116-
*d_first = encode_dna_four_bit(*first++, base_in_byte::left);
117-
*d_first |= (first == last ? 0 :
118-
encode_dna_four_bit(*first++, base_in_byte::right));
87+
*d_first = 0;
88+
for (size_t i = 0; i < 16 && first != last; ++i)
89+
*d_first |= encode_dna_four_bit(*first++, i);
11990
++d_first;
12091
}
12192
return d_first;
12293
}
12394

124-
// ADS: indented to be used as pointer to 4-bit encoding of DNA within a vector
125-
// of uint8_t values
95+
// GS: intended to be used as pointer to 4-bit encoding of DNA within a vector
96+
// of size_t values
12697
struct genome_four_bit_itr {
127-
genome_four_bit_itr(const std::vector<uint8_t>::const_iterator itr_,
128-
const bool odd_ = false) : itr(itr_), itr_odd(odd_) {}
98+
genome_four_bit_itr(const std::vector<size_t>::const_iterator itr_,
99+
const int off_ = 0) : itr(itr_), offset(off_) {}
129100

130-
uint8_t operator*() const {
131-
return (!itr_odd ? *itr : (*itr >> 4)) & 15;
101+
size_t operator*() const {
102+
return (*itr >> (offset << 2)) & 15ul;
132103
}
133104
genome_four_bit_itr& operator++() {
134-
itr += itr_odd;
135-
itr_odd ^= 1ul;
105+
offset = (offset + 1) & 15ul;
106+
itr += (offset == 0);
136107
return *this;
137108
}
138109
genome_four_bit_itr operator++(int) {
139110
genome_four_bit_itr tmp(*this);
140-
itr += itr_odd;
141-
itr_odd ^= 1ul;
111+
offset = (offset + 1) & 15ul;
112+
itr += (offset == 0);
142113
return tmp;
143114
}
144115
genome_four_bit_itr& operator--() {
145-
itr -= !itr_odd;
146-
itr_odd ^= 1ul;
116+
itr -= (offset == 0);
117+
118+
// GS: will underflow on 0 but it's ok?
119+
offset = (offset - 1) & 15ul;
147120
return *this;
148121
}
149122
genome_four_bit_itr operator--(int) {
150123
genome_four_bit_itr tmp(*this);
151-
itr -= !itr_odd;
152-
itr_odd ^= 1ul;
124+
itr -= (offset == 0);
125+
offset = (offset - 1) & 15ul;
153126
return tmp;
154127
}
155-
genome_four_bit_itr operator+(const size_t offset) const {
156-
const size_t offset_odd = offset & 1ul;
157-
return genome_four_bit_itr(itr + offset/2 + (itr_odd & offset_odd),
158-
itr_odd != offset_odd);
128+
genome_four_bit_itr operator+(const size_t step) const {
129+
// whether the sum of offsets is >= 16
130+
const bool shift_one_pos = (offset + (step & 15ul) >= 16);
131+
132+
const size_t new_offset = (offset + step) & 15ul;
133+
return genome_four_bit_itr(itr + step/16 + shift_one_pos,
134+
new_offset);
159135
}
160136
bool operator!=(const genome_four_bit_itr &rhs) const {
161-
return itr != rhs.itr || itr_odd != rhs.itr_odd;
137+
return itr != rhs.itr || offset != rhs.offset;
162138
}
163-
std::vector<uint8_t>::const_iterator itr;
164-
size_t itr_odd;
139+
std::vector<size_t>::const_iterator itr;
140+
int offset;
165141
};
166142

167143
#endif

0 commit comments

Comments
 (0)