Skip to content

Commit 87f6d86

Browse files
adding write_sam_header and faster to_string
1 parent 4e3960a commit 87f6d86

File tree

2 files changed

+59
-29
lines changed

2 files changed

+59
-29
lines changed

sam_record.cpp

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ using std::ostream;
3434
using std::begin;
3535
using std::end;
3636
using std::ostringstream;
37+
using std::to_string;
3738

3839
// ADS: this is for debugging purposes
3940
string
@@ -73,42 +74,36 @@ format_sam_flags(const uint16_t the_flags) {
7374
// return regex_match(qual, regex("[!-~]+"));
7475
// }
7576

77+
size_t
78+
sam_rec::estimate_line_size() const {
79+
static const size_t all_field_estimates = 100;
80+
return qname.size() + rname.size() + qual.size() + all_field_estimates;
81+
}
82+
7683
string
7784
sam_rec::tostring() const {
78-
ostringstream oss;
79-
oss << qname << '\t'
80-
<< flags << '\t'
81-
<< rname << '\t'
82-
<< pos << '\t'
83-
<< static_cast<unsigned>(mapq) << '\t'
84-
<< cigar << '\t'
85-
<< rnext << '\t'
86-
<< pnext << '\t'
87-
<< tlen << '\t'
88-
<< seq << '\t'
89-
<< qual;
90-
85+
string out;
86+
out.reserve(estimate_line_size());
87+
out.append(qname + "\t" +
88+
to_string(flags) + "\t" +
89+
rname + "\t" +
90+
to_string(pos) + "\t" +
91+
to_string(static_cast<unsigned>(mapq)) + "\t" +
92+
cigar + "\t" +
93+
rnext + "\t" +
94+
to_string(pnext) + "\t" +
95+
to_string(tlen) + "\t" +
96+
seq + "\t" +
97+
qual);
9198
for (auto it(begin(tags)); it != end(tags); ++it)
92-
oss << '\t' << *it;
93-
return oss.str() + "\n";
99+
out.append("\t" + *it);
100+
101+
return out;
94102
}
95103

96104
ostream &
97105
operator<<(std::ostream &the_stream, const sam_rec &r) {
98-
the_stream << r.qname << '\t'
99-
<< r.flags << '\t'
100-
<< r.rname << '\t'
101-
<< r.pos << '\t'
102-
<< static_cast<unsigned>(r.mapq) << '\t'
103-
<< r.cigar << '\t'
104-
<< r.rnext << '\t'
105-
<< r.pnext << '\t'
106-
<< r.tlen << '\t'
107-
<< r.seq << '\t'
108-
<< r.qual;
109-
110-
for (auto it(begin(r.tags)); it != end(r.tags); ++it)
111-
the_stream << '\t' << *it;
106+
the_stream << r.tostring();
112107
return the_stream;
113108
}
114109

sam_record.hpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include <string>
2323
#include <vector>
2424
#include <iostream>
25+
#include <sstream>
26+
#include <iterator>
2527

2628
// from 30 April 2020 SAM documentation
2729
// 1 0x1 template having multiple segments in sequencing
@@ -117,6 +119,7 @@ class sam_rec {
117119
seq(_seq),
118120
qual(_qual) {}
119121
void add_tag(const std::string &the_tag) {tags.push_back(the_tag);}
122+
size_t estimate_line_size() const;
120123
std::string tostring() const;
121124
};
122125

@@ -145,4 +148,36 @@ void
145148
inflate_with_cigar(const sam_rec &sr, std::string &to_inflate,
146149
const char inflation_symbol = 'N');
147150

151+
template<typename T>
152+
static void
153+
write_sam_header(const std::vector<std::string> &chrom_names,
154+
const std::vector<T> &chrom_starts,
155+
const std::string program_name,
156+
const std::string program_version,
157+
const int argc, const char **argv,
158+
std::ostream &out) {
159+
static const std::string SAM_VERSION = "1.0";
160+
161+
// sam version
162+
out <<"@HD" << '\t' << "VN:" << SAM_VERSION << '\n'; // sam version
163+
164+
// chromosome sizes
165+
const size_t n_chroms = chrom_names.size() - 1;
166+
for (size_t i = 1; i < n_chroms; ++i) {
167+
out << "@SQ" << '\t'
168+
<< "SN:" << chrom_names[i] << '\t'
169+
<< "LN:" << chrom_starts[i+1] - chrom_starts[i] << '\n';
170+
}
171+
172+
// program details
173+
out << "@PG" << '\t'
174+
<< "ID:" << program_name << '\t'
175+
<< "VN:" << program_version << '\t';
176+
177+
// how the program was run
178+
std::ostringstream the_command;
179+
copy(argv, argv + argc, std::ostream_iterator<const char*>(the_command, " "));
180+
out << "CL:\"" << the_command.str() << "\"" << std::endl;
181+
}
182+
148183
#endif

0 commit comments

Comments
 (0)