@@ -27,40 +27,24 @@ extern char dna_four_bit_decoding[16];
2727
2828template <typename uint_type> constexpr
2929uint_type
30- get_low_nibble (const uint_type x) {return x & 15u ;}
31-
32- template <typename uint_type> constexpr
33- uint_type
34- get_high_nibble (const uint_type x) {return (x >> 4 ) & 15u ;}
35-
36- template <typename uint_type> constexpr
37- char
38- decode_dna_four_bit_low (const uint_type x) {
39- return dna_four_bit_decoding[get_low_nibble (x)];
40- }
41-
42- template <typename uint_type> constexpr
43- char
44- decode_dna_four_bit_high (const uint_type x) {
45- return dna_four_bit_decoding[get_high_nibble (x)];
30+ get_nibble (const uint_type x, const size_t offset) {
31+ return (x >> (4 *offset)) & 15ul ;
4632}
4733
4834template <typename uint_type> constexpr
4935char
5036decode_dna_four_bit (const uint_type x,
51- const base_in_byte b = base_in_byte::left) {
52- return b == base_in_byte::left ?
53- decode_dna_four_bit_low (x) :
54- decode_dna_four_bit_high (x);
37+ const size_t offset) {
38+ return dna_four_bit_decoding[get_nibble (x, offset)];
5539}
5640
5741template <class InputItr , class OutputIt >
5842OutputIt
5943decode_dna_four_bit (InputItr first, InputItr last, OutputIt d_first) {
6044 // ADS: assume destination has enough space
6145 while (first != last) {
62- *d_first++ = decode_dna_four_bit (*first, base_in_byte::left);
63- *d_first++ = decode_dna_four_bit (*first, base_in_byte::right );
46+ for ( size_t offset = 0 ; offset < 16 ; ++offset)
47+ *d_first++ = decode_dna_four_bit (*first, offset );
6448 ++first;
6549 }
6650 // if original sequence length is odd and encoding not padded at the front,
7357decode_dna_four_bit (const InCtr &source, OutCtr &dest) {
7458 // expand out the bytes as pairs (do this backwards in case source == dest)
7559 const size_t source_size = source.size ();
76- dest.resize (2 *source_size);
60+ dest.resize (16 *source_size);
7761 size_t i = source_size;
7862 size_t j = dest.size ();
7963 while (i > 0 ) {
8064 dest[--j] = source[--i];
8165 dest[--j] = source[i];
8266 }
83- for (i = 0 ; i < dest.size (); i += 2 ) {
84- dest[i] = decode_dna_four_bit (dest[i], base_in_byte::left);
85- dest[i+ 1 ] = decode_dna_four_bit (dest[i+ 1 ], base_in_byte::right );
67+ for (i = 0 ; i < dest.size (); i += 16 ) {
68+ for ( size_t offset = 0 ; offset < 16 ; ++offset)
69+ dest[i + offset ] = decode_dna_four_bit (dest[i], offset );
8670 }
8771}
8872
8973extern uint8_t dna_four_bit_encoding[128 ];
90-
91- template <typename uint_type> constexpr
92- uint8_t
93- encode_dna_four_bit_low (const uint_type x) {
94- return dna_four_bit_encoding[static_cast <unsigned >(x)];
95- }
96-
9774template <typename uint_type> constexpr
98- uint8_t
99- encode_dna_four_bit_high (const uint_type x) {
100- return dna_four_bit_encoding[static_cast <unsigned >(x)] << 4 ;
101- }
102-
103- template <typename uint_type> constexpr
104- uint8_t
75+ size_t
10576encode_dna_four_bit (const uint_type x,
106- const base_in_byte b = base_in_byte::left ) {
107- return b == base_in_byte::left ?
108- encode_dna_four_bit_low (x) :
109- encode_dna_four_bit_high (x );
77+ const size_t offset ) {
78+ return ( static_cast < size_t >(
79+ dna_four_bit_encoding[ static_cast < unsigned > (x)])
80+ ) << ( 4 *offset );
11081}
11182
11283template <class InputItr , class OutputIt >
11384OutputIt
11485encode_dna_four_bit (InputItr first, InputItr last, OutputIt d_first) {
11586 while (first != last) {
116- *d_first = encode_dna_four_bit (*first++, base_in_byte::left) ;
117- *d_first |= ( first == last ? 0 :
118- encode_dna_four_bit (*first++, base_in_byte::right) );
87+ *d_first = 0 ;
88+ for ( size_t i = 0 ; i < 16 && first != last; ++i)
89+ *d_first |= encode_dna_four_bit (*first++, i );
11990 ++d_first;
12091 }
12192 return d_first;
12293}
12394
124- // ADS: indented to be used as pointer to 4-bit encoding of DNA within a vector
125- // of uint8_t values
95+ // GS: intended to be used as pointer to 4-bit encoding of DNA within a vector
96+ // of size_t values
12697struct genome_four_bit_itr {
127- genome_four_bit_itr (const std::vector<uint8_t >::const_iterator itr_,
128- const bool odd_ = false ) : itr(itr_), itr_odd(odd_ ) {}
98+ genome_four_bit_itr (const std::vector<size_t >::const_iterator itr_,
99+ const int off_ = 0 ) : itr(itr_), offset(off_ ) {}
129100
130- uint8_t operator *() const {
131- return (!itr_odd ? *itr : (*itr >> 4 )) & 15 ;
101+ size_t operator *() const {
102+ return (*itr >> (offset << 2 )) & 15ul ;
132103 }
133104 genome_four_bit_itr& operator ++() {
134- itr += itr_odd ;
135- itr_odd ^= 1ul ;
105+ offset = (offset + 1 ) & 15ul ;
106+ itr += (offset == 0 ) ;
136107 return *this ;
137108 }
138109 genome_four_bit_itr operator ++(int ) {
139110 genome_four_bit_itr tmp (*this );
140- itr += itr_odd ;
141- itr_odd ^= 1ul ;
111+ offset = (offset + 1 ) & 15ul ;
112+ itr += (offset == 0 ) ;
142113 return tmp;
143114 }
144115 genome_four_bit_itr& operator --() {
145- itr -= !itr_odd;
146- itr_odd ^= 1ul ;
116+ itr -= (offset == 0 );
117+
118+ // GS: will underflow on 0 but it's ok?
119+ offset = (offset - 1 ) & 15ul ;
147120 return *this ;
148121 }
149122 genome_four_bit_itr operator --(int ) {
150123 genome_four_bit_itr tmp (*this );
151- itr -= !itr_odd ;
152- itr_odd ^= 1ul ;
124+ itr -= (offset == 0 ) ;
125+ offset = (offset - 1 ) & 15ul ;
153126 return tmp;
154127 }
155- genome_four_bit_itr operator +(const size_t offset) const {
156- const size_t offset_odd = offset & 1ul ;
157- return genome_four_bit_itr (itr + offset/2 + (itr_odd & offset_odd),
158- itr_odd != offset_odd);
128+ genome_four_bit_itr operator +(const size_t step) const {
129+ // whether the sum of offsets is >= 16
130+ const bool shift_one_pos =
131+ (((offset + (static_cast <int >(step) & 15 )) & 16 ) >> 4 );
132+
133+ const int new_offset = (offset + step) & 15 ;
134+ return genome_four_bit_itr (itr + step/16 + shift_one_pos,
135+ new_offset);
159136 }
160137 bool operator !=(const genome_four_bit_itr &rhs) const {
161- return itr != rhs.itr || itr_odd != rhs.itr_odd ;
138+ return itr != rhs.itr || offset != rhs.offset ;
162139 }
163- std::vector<uint8_t >::const_iterator itr;
164- size_t itr_odd ;
140+ std::vector<size_t >::const_iterator itr;
141+ int offset ;
165142};
166143
167144#endif
0 commit comments