Skip to content

Commit 3e0fa33

Browse files
yhiroseCopilot
andauthored
Implement ETag and Last-Modified support for static file responses and If-Range requests (#2286)
* Fix #2242: Implement ETag and Last-Modified support for static file responses * Add ETag and Last-Modified handling for If-Range requests * Enhance HTTP date parsing with improved error handling and locale support * Update httplib.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update test/test.cc Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update httplib.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Refactor ETag handling: separate strong and weak ETag checks for If-Range requests * Fix type for mtime in FileStat and improve ETag handling comments * Update httplib.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Resolved code review comments * Update httplib.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update httplib.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Refactor ETag handling: use 'auto' for type inference and improve code readability * Refactor ETag handling: extract check_if_not_modified and check_if_range methods for improved readability and maintainability * Code cleanup * Update httplib.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update test/test.cc Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update httplib.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update httplib.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Enhance ETag handling and validation in httplib.h and add comprehensive tests in test.cc * Refactor ETag comparison logic and add test for If-None-Match with non-existent file * Fix #2287 * Code cleanup * Add tests for extreme date values and negative file modification time in ETag handling * Update HTTP-date parsing comments to reference RFC 9110 --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 27b73f0 commit 3e0fa33

File tree

2 files changed

+768
-16
lines changed

2 files changed

+768
-16
lines changed

httplib.h

Lines changed: 233 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,17 +1011,6 @@ using ErrorLogger = std::function<void(const Error &, const Request *)>;
10111011

10121012
using SocketOptions = std::function<void(socket_t sock)>;
10131013

1014-
namespace detail {
1015-
1016-
bool set_socket_opt_impl(socket_t sock, int level, int optname,
1017-
const void *optval, socklen_t optlen);
1018-
bool set_socket_opt(socket_t sock, int level, int optname, int opt);
1019-
bool set_socket_opt_time(socket_t sock, int level, int optname, time_t sec,
1020-
time_t usec);
1021-
int close_socket(socket_t sock);
1022-
1023-
} // namespace detail
1024-
10251014
void default_socket_options(socket_t sock);
10261015

10271016
const char *status_message(int status);
@@ -1102,10 +1091,9 @@ class RegexMatcher final : public MatcherBase {
11021091
std::regex regex_;
11031092
};
11041093

1105-
ssize_t write_headers(Stream &strm, const Headers &headers);
1094+
int close_socket(socket_t sock);
11061095

1107-
std::string make_host_and_port_string(const std::string &host, int port,
1108-
bool is_ssl);
1096+
ssize_t write_headers(Stream &strm, const Headers &headers);
11091097

11101098
} // namespace detail
11111099

@@ -1257,7 +1245,11 @@ class Server {
12571245
bool listen_internal();
12581246

12591247
bool routing(Request &req, Response &res, Stream &strm);
1260-
bool handle_file_request(const Request &req, Response &res);
1248+
bool handle_file_request(Request &req, Response &res);
1249+
bool check_if_not_modified(const Request &req, Response &res,
1250+
const std::string &etag, time_t mtime) const;
1251+
bool check_if_range(Request &req, const std::string &etag,
1252+
time_t mtime) const;
12611253
bool dispatch_request(Request &req, Response &res,
12621254
const Handlers &handlers) const;
12631255
bool dispatch_request_for_content_reader(
@@ -2593,6 +2585,8 @@ struct FileStat {
25932585
FileStat(const std::string &path);
25942586
bool is_file() const;
25952587
bool is_dir() const;
2588+
time_t mtime() const;
2589+
size_t size() const;
25962590

25972591
private:
25982592
#if defined(_WIN32)
@@ -2603,6 +2597,9 @@ struct FileStat {
26032597
int ret_ = -1;
26042598
};
26052599

2600+
std::string make_host_and_port_string(const std::string &host, int port,
2601+
bool is_ssl);
2602+
26062603
std::string trim_copy(const std::string &s);
26072604

26082605
void divide(
@@ -2971,6 +2968,90 @@ inline std::string from_i_to_hex(size_t n) {
29712968
return ret;
29722969
}
29732970

2971+
inline std::string compute_etag(const FileStat &fs) {
2972+
if (!fs.is_file()) { return std::string(); }
2973+
2974+
// If mtime cannot be determined (negative value indicates an error
2975+
// or sentinel), do not generate an ETag. Returning a neutral / fixed
2976+
// value like 0 could collide with a real file that legitimately has
2977+
// mtime == 0 (epoch) and lead to misleading validators.
2978+
auto mtime_raw = fs.mtime();
2979+
if (mtime_raw < 0) { return std::string(); }
2980+
2981+
auto mtime = static_cast<size_t>(mtime_raw);
2982+
auto size = fs.size();
2983+
2984+
return std::string("W/\"") + from_i_to_hex(mtime) + "-" +
2985+
from_i_to_hex(size) + "\"";
2986+
}
2987+
2988+
// Format time_t as HTTP-date (RFC 9110 Section 5.6.7): "Sun, 06 Nov 1994
2989+
// 08:49:37 GMT" This implementation is defensive: it validates `mtime`, checks
2990+
// return values from `gmtime_r`/`gmtime_s`, and ensures `strftime` succeeds.
2991+
inline std::string file_mtime_to_http_date(time_t mtime) {
2992+
if (mtime < 0) { return std::string(); }
2993+
2994+
struct tm tm_buf;
2995+
#ifdef _WIN32
2996+
if (gmtime_s(&tm_buf, &mtime) != 0) { return std::string(); }
2997+
#else
2998+
if (gmtime_r(&mtime, &tm_buf) == nullptr) { return std::string(); }
2999+
#endif
3000+
char buf[64];
3001+
if (strftime(buf, sizeof(buf), "%a, %d %b %Y %H:%M:%S GMT", &tm_buf) == 0) {
3002+
return std::string();
3003+
}
3004+
3005+
return std::string(buf);
3006+
}
3007+
3008+
// Parse HTTP-date (RFC 9110 Section 5.6.7) to time_t. Returns -1 on failure.
3009+
inline time_t parse_http_date(const std::string &date_str) {
3010+
struct tm tm_buf;
3011+
3012+
// Create a classic locale object once for all parsing attempts
3013+
const std::locale classic_locale = std::locale::classic();
3014+
3015+
// Try to parse using std::get_time (C++11, cross-platform)
3016+
auto try_parse = [&](const char *fmt) -> bool {
3017+
std::istringstream ss(date_str);
3018+
ss.imbue(classic_locale);
3019+
3020+
memset(&tm_buf, 0, sizeof(tm_buf));
3021+
ss >> std::get_time(&tm_buf, fmt);
3022+
3023+
return !ss.fail();
3024+
};
3025+
3026+
// RFC 9110 preferred format (HTTP-date): "Sun, 06 Nov 1994 08:49:37 GMT"
3027+
if (!try_parse("%a, %d %b %Y %H:%M:%S")) {
3028+
// RFC 850 format: "Sunday, 06-Nov-94 08:49:37 GMT"
3029+
if (!try_parse("%A, %d-%b-%y %H:%M:%S")) {
3030+
// asctime format: "Sun Nov 6 08:49:37 1994"
3031+
if (!try_parse("%a %b %d %H:%M:%S %Y")) {
3032+
return static_cast<time_t>(-1);
3033+
}
3034+
}
3035+
}
3036+
3037+
#ifdef _WIN32
3038+
return _mkgmtime(&tm_buf);
3039+
#else
3040+
return timegm(&tm_buf);
3041+
#endif
3042+
}
3043+
3044+
inline bool is_weak_etag(const std::string &s) {
3045+
// Check if the string is a weak ETag (starts with 'W/"')
3046+
return s.size() > 3 && s[0] == 'W' && s[1] == '/' && s[2] == '"';
3047+
}
3048+
3049+
inline bool is_strong_etag(const std::string &s) {
3050+
// Check if the string is a strong ETag (starts and ends with '"', at least 2
3051+
// chars)
3052+
return s.size() >= 2 && s[0] == '"' && s.back() == '"';
3053+
}
3054+
29743055
inline size_t to_utf8(int code, char *buff) {
29753056
if (code < 0x0080) {
29763057
buff[0] = static_cast<char>(code & 0x7F);
@@ -3090,6 +3171,15 @@ inline bool FileStat::is_dir() const {
30903171
return ret_ >= 0 && S_ISDIR(st_.st_mode);
30913172
}
30923173

3174+
inline time_t FileStat::mtime() const {
3175+
return ret_ >= 0 ? static_cast<time_t>(st_.st_mtime)
3176+
: static_cast<time_t>(-1);
3177+
}
3178+
3179+
inline size_t FileStat::size() const {
3180+
return ret_ >= 0 ? static_cast<size_t>(st_.st_size) : 0;
3181+
}
3182+
30933183
inline std::string encode_path(const std::string &s) {
30943184
std::string result;
30953185
result.reserve(s.size());
@@ -3345,6 +3435,42 @@ inline void split(const char *b, const char *e, char d, size_t m,
33453435
}
33463436
}
33473437

3438+
inline bool split_find(const char *b, const char *e, char d, size_t m,
3439+
std::function<bool(const char *, const char *)> fn) {
3440+
size_t i = 0;
3441+
size_t beg = 0;
3442+
size_t count = 1;
3443+
3444+
while (e ? (b + i < e) : (b[i] != '\0')) {
3445+
if (b[i] == d && count < m) {
3446+
auto r = trim(b, e, beg, i);
3447+
if (r.first < r.second) {
3448+
auto found = fn(&b[r.first], &b[r.second]);
3449+
if (found) { return true; }
3450+
}
3451+
beg = i + 1;
3452+
count++;
3453+
}
3454+
i++;
3455+
}
3456+
3457+
if (i) {
3458+
auto r = trim(b, e, beg, i);
3459+
if (r.first < r.second) {
3460+
auto found = fn(&b[r.first], &b[r.second]);
3461+
if (found) { return true; }
3462+
}
3463+
}
3464+
3465+
return false;
3466+
}
3467+
3468+
inline bool split_find(const char *b, const char *e, char d,
3469+
std::function<bool(const char *, const char *)> fn) {
3470+
return split_find(b, e, d, (std::numeric_limits<size_t>::max)(),
3471+
std::move(fn));
3472+
}
3473+
33483474
inline stream_line_reader::stream_line_reader(Stream &strm, char *fixed_buffer,
33493475
size_t fixed_buffer_size)
33503476
: strm_(strm), fixed_buffer_(fixed_buffer),
@@ -8256,7 +8382,7 @@ inline bool Server::read_content_core(
82568382
return true;
82578383
}
82588384

8259-
inline bool Server::handle_file_request(const Request &req, Response &res) {
8385+
inline bool Server::handle_file_request(Request &req, Response &res) {
82608386
for (const auto &entry : base_dirs_) {
82618387
// Prefix match
82628388
if (!req.path.compare(0, entry.mount_point.size(), entry.mount_point)) {
@@ -8277,6 +8403,20 @@ inline bool Server::handle_file_request(const Request &req, Response &res) {
82778403
res.set_header(kv.first, kv.second);
82788404
}
82798405

8406+
auto etag = detail::compute_etag(stat);
8407+
if (!etag.empty()) { res.set_header("ETag", etag); }
8408+
8409+
auto mtime = stat.mtime();
8410+
8411+
auto last_modified = detail::file_mtime_to_http_date(mtime);
8412+
if (!last_modified.empty()) {
8413+
res.set_header("Last-Modified", last_modified);
8414+
}
8415+
8416+
if (check_if_not_modified(req, res, etag, mtime)) { return true; }
8417+
8418+
check_if_range(req, etag, mtime);
8419+
82808420
auto mm = std::make_shared<detail::mmap>(path.c_str());
82818421
if (!mm->is_open()) {
82828422
output_error_log(Error::OpenFile, &req);
@@ -8306,6 +8446,79 @@ inline bool Server::handle_file_request(const Request &req, Response &res) {
83068446
return false;
83078447
}
83088448

8449+
inline bool Server::check_if_not_modified(const Request &req, Response &res,
8450+
const std::string &etag,
8451+
time_t mtime) const {
8452+
// Handle conditional GET:
8453+
// 1. If-None-Match takes precedence (RFC 9110 Section 13.1.2)
8454+
// 2. If-Modified-Since is checked only when If-None-Match is absent
8455+
if (req.has_header("If-None-Match")) {
8456+
if (!etag.empty()) {
8457+
auto val = req.get_header_value("If-None-Match");
8458+
8459+
// NOTE: We use exact string matching here. This works correctly
8460+
// because our server always generates weak ETags (W/"..."), and
8461+
// clients typically send back the same ETag they received.
8462+
// RFC 9110 Section 8.8.3.2 allows weak comparison for
8463+
// If-None-Match, where W/"x" and "x" would match, but this
8464+
// simplified implementation requires exact matches.
8465+
auto ret = detail::split_find(val.data(), val.data() + val.size(), ',',
8466+
[&](const char *b, const char *e) {
8467+
return std::equal(b, e, "*") ||
8468+
std::equal(b, e, etag.begin());
8469+
});
8470+
8471+
if (ret) {
8472+
res.status = StatusCode::NotModified_304;
8473+
return true;
8474+
}
8475+
}
8476+
} else if (req.has_header("If-Modified-Since")) {
8477+
auto val = req.get_header_value("If-Modified-Since");
8478+
auto t = detail::parse_http_date(val);
8479+
8480+
if (t != static_cast<time_t>(-1) && mtime <= t) {
8481+
res.status = StatusCode::NotModified_304;
8482+
return true;
8483+
}
8484+
}
8485+
return false;
8486+
}
8487+
8488+
inline bool Server::check_if_range(Request &req, const std::string &etag,
8489+
time_t mtime) const {
8490+
// Handle If-Range for partial content requests (RFC 9110
8491+
// Section 13.1.5). If-Range is only evaluated when Range header is
8492+
// present. If the validator matches, serve partial content; otherwise
8493+
// serve full content.
8494+
if (!req.ranges.empty() && req.has_header("If-Range")) {
8495+
auto val = req.get_header_value("If-Range");
8496+
8497+
auto is_valid_range = [&]() {
8498+
if (detail::is_strong_etag(val)) {
8499+
// RFC 9110 Section 13.1.5: If-Range requires strong ETag
8500+
// comparison.
8501+
return (!etag.empty() && val == etag);
8502+
} else if (detail::is_weak_etag(val)) {
8503+
// Weak ETags are not valid for If-Range (RFC 9110 Section 13.1.5)
8504+
return false;
8505+
} else {
8506+
// HTTP-date comparison
8507+
auto t = detail::parse_http_date(val);
8508+
return (t != static_cast<time_t>(-1) && mtime <= t);
8509+
}
8510+
};
8511+
8512+
if (!is_valid_range()) {
8513+
// Validator doesn't match: ignore Range and serve full content
8514+
req.ranges.clear();
8515+
return false;
8516+
}
8517+
}
8518+
8519+
return true;
8520+
}
8521+
83098522
inline socket_t
83108523
Server::create_server_socket(const std::string &host, int port,
83118524
int socket_flags,
@@ -8573,10 +8786,13 @@ inline void Server::apply_ranges(const Request &req, Response &res,
85738786
res.set_header("Transfer-Encoding", "chunked");
85748787
if (type == detail::EncodingType::Gzip) {
85758788
res.set_header("Content-Encoding", "gzip");
8789+
res.set_header("Vary", "Accept-Encoding");
85768790
} else if (type == detail::EncodingType::Brotli) {
85778791
res.set_header("Content-Encoding", "br");
8792+
res.set_header("Vary", "Accept-Encoding");
85788793
} else if (type == detail::EncodingType::Zstd) {
85798794
res.set_header("Content-Encoding", "zstd");
8795+
res.set_header("Vary", "Accept-Encoding");
85808796
}
85818797
}
85828798
}
@@ -8635,6 +8851,7 @@ inline void Server::apply_ranges(const Request &req, Response &res,
86358851
})) {
86368852
res.body.swap(compressed);
86378853
res.set_header("Content-Encoding", content_encoding);
8854+
res.set_header("Vary", "Accept-Encoding");
86388855
}
86398856
}
86408857
}

0 commit comments

Comments
 (0)