diff --git a/Cargo.lock b/Cargo.lock index 0e8170576..a626cc419 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2068,6 +2068,7 @@ dependencies = [ "kuchikiki", "log", "lol_html", + "md5", "mime", "mime_guess", "mockito", @@ -2079,6 +2080,8 @@ dependencies = [ "opentelemetry_sdk", "path-slash", "percent-encoding", + "phf 0.13.1", + "phf_codegen 0.13.1", "pretty_assertions", "prometheus", "rand 0.9.2", @@ -5413,6 +5416,12 @@ dependencies = [ "digest", ] +[[package]] +name = "md5" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0" + [[package]] name = "memchr" version = "2.7.6" @@ -6140,6 +6149,16 @@ dependencies = [ "phf_shared 0.11.3", ] +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_shared 0.13.1", + "serde", +] + [[package]] name = "phf_codegen" version = "0.8.0" @@ -6170,6 +6189,16 @@ dependencies = [ "phf_shared 0.11.3", ] +[[package]] +name = "phf_codegen" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1" +dependencies = [ + "phf_generator 0.13.1", + "phf_shared 0.13.1", +] + [[package]] name = "phf_generator" version = "0.8.0" diff --git a/Cargo.toml b/Cargo.toml index 98fd0e803..09594a23c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -102,6 +102,7 @@ fn-error-context = "0.2.0" # Templating askama = "0.14.0" walkdir = "2" +phf = "0.13.1" # Date and Time utilities chrono = { version = "0.4.11", default-features = false, features = ["clock", "serde"] } @@ -110,6 +111,7 @@ chrono = { version = "0.4.11", default-features = false, features = ["clock", "s thread_local = "1.1.3" constant_time_eq = "0.4.2" fastly-api = "12.0.0" +md5 = "0.8.0" [dev-dependencies] criterion = "0.7.0" @@ -132,8 +134,10 @@ debug = "line-tables-only" [build-dependencies] time = "0.3" +md5 = "0.8.0" gix = { version = "0.74.0", default-features = false } string_cache_codegen = "0.6.1" +phf_codegen = "0.13" walkdir = "2" anyhow = { version = "1.0.42", features = ["backtrace"] } grass = { version = "0.13.1", default-features = false } diff --git a/build.rs b/build.rs index 7c7548e9c..dfd2156e9 100644 --- a/build.rs +++ b/build.rs @@ -1,5 +1,5 @@ use anyhow::{Context as _, Error, Result}; -use std::{env, path::Path}; +use std::{env, fs::File, io::Write as _, path::Path}; mod tracked { use std::{ @@ -68,13 +68,27 @@ mod tracked { } } +type ETagMap<'a> = phf_codegen::Map<'a, String>; + fn main() -> Result<()> { let out_dir = env::var("OUT_DIR").context("missing OUT_DIR")?; let out_dir = Path::new(&out_dir); read_git_version()?; - compile_sass(out_dir)?; + + let mut etag_map: ETagMap = ETagMap::new(); + + compile_sass(out_dir, &mut etag_map)?; write_known_targets(out_dir)?; compile_syntax(out_dir).context("could not compile syntax files")?; + calculate_static_etags(&mut etag_map)?; + + let mut etag_file = File::create(out_dir.join("static_etag_map.rs"))?; + writeln!( + &mut etag_file, + "pub static STATIC_ETAG_MAP: ::phf::Map<&'static str, &'static str> = {};", + etag_map.build() + )?; + etag_file.sync_all()?; // trigger recompilation when a new migration is added println!("cargo:rerun-if-changed=migrations"); @@ -118,6 +132,16 @@ fn get_git_hash() -> Result> { } } +fn etag_from_path(path: impl AsRef) -> Result { + Ok(etag_from_content(std::fs::read(&path)?)) +} + +fn etag_from_content(content: impl AsRef<[u8]>) -> String { + let digest = md5::compute(content); + let md5_hex = format!("{:x}", digest); + format!(r#""\"{md5_hex}\"""#) +} + fn compile_sass_file(src: &Path, dest: &Path) -> Result<()> { let css = grass::from_path( src.to_str() @@ -133,7 +157,7 @@ fn compile_sass_file(src: &Path, dest: &Path) -> Result<()> { Ok(()) } -fn compile_sass(out_dir: &Path) -> Result<()> { +fn compile_sass(out_dir: &Path, etag_map: &mut ETagMap) -> Result<()> { const STYLE_DIR: &str = "templates/style"; for entry in walkdir::WalkDir::new(STYLE_DIR) { @@ -146,12 +170,13 @@ fn compile_sass(out_dir: &Path) -> Result<()> { .to_str() .context("file name must be a utf-8 string")?; if !file_name.starts_with('_') { - let dest = out_dir - .join(entry.path().strip_prefix(STYLE_DIR)?) - .with_extension("css"); + let dest = out_dir.join(file_name).with_extension("css"); compile_sass_file(entry.path(), &dest).with_context(|| { format!("compiling {} to {}", entry.path().display(), dest.display()) })?; + + let dest_str = dest.file_name().unwrap().to_str().unwrap().to_owned(); + etag_map.entry(dest_str, etag_from_path(&dest)?); } } } @@ -160,7 +185,32 @@ fn compile_sass(out_dir: &Path) -> Result<()> { let pure = tracked::read_to_string("vendor/pure-css/css/pure-min.css")?; let grids = tracked::read_to_string("vendor/pure-css/css/grids-responsive-min.css")?; let vendored = pure + &grids; - std::fs::write(out_dir.join("vendored").with_extension("css"), vendored)?; + std::fs::write(out_dir.join("vendored").with_extension("css"), &vendored)?; + + etag_map.entry( + "vendored.css".to_owned(), + etag_from_content(vendored.as_bytes()), + ); + + Ok(()) +} + +fn calculate_static_etags(etag_map: &mut ETagMap) -> Result<()> { + const STATIC_DIRS: &[&str] = &["static", "vendor"]; + + for static_dir in STATIC_DIRS { + for entry in walkdir::WalkDir::new(static_dir) { + let entry = entry?; + let path = entry.path(); + if !path.is_file() { + continue; + } + + let partial_path = path.strip_prefix(static_dir).unwrap(); + let partial_path_str = partial_path.to_string_lossy().to_string(); + etag_map.entry(partial_path_str, etag_from_path(path)?); + } + } Ok(()) } diff --git a/dockerfiles/Dockerfile b/dockerfiles/Dockerfile index 42a820c47..1cf4cd981 100644 --- a/dockerfiles/Dockerfile +++ b/dockerfiles/Dockerfile @@ -73,6 +73,7 @@ COPY src src/ RUN find src -name "*.rs" -exec touch {} \; COPY templates templates/ COPY vendor vendor/ +COPY static static/ COPY assets assets/ COPY .sqlx .sqlx/ COPY migrations migrations/ diff --git a/src/db/mimes.rs b/src/db/mimes.rs index 5917a5f0e..d59d25b03 100644 --- a/src/db/mimes.rs +++ b/src/db/mimes.rs @@ -10,6 +10,10 @@ macro_rules! mime { mime!(APPLICATION_ZIP, "application/zip"); mime!(APPLICATION_ZSTD, "application/zstd"); mime!(APPLICATION_GZIP, "application/gzip"); +mime!( + APPLICATION_OPENSEARCH_XML, + "application/opensearchdescription+xml" +); mime!(APPLICATION_XML, "application/xml"); mime!(TEXT_MARKDOWN, "text/markdown"); mime!(TEXT_RUST, "text/rust"); diff --git a/src/web/headers/if_none_match.rs b/src/web/headers/if_none_match.rs new file mode 100644 index 000000000..67983a504 --- /dev/null +++ b/src/web/headers/if_none_match.rs @@ -0,0 +1,173 @@ +//! Adapted version of `headers::IfNoneMatch`. +//! +//! The combination of `TypedHeader` and `IfNoneMatch` works in odd ways. +//! They are built in a way that a _missing_ `If-None-Match` header will lead to: +//! +//! 1. extractor with `TypedHeader` returning `IfNoneMatch("")` +//! 2. extractor with `Option>` returning `Some(IfNoneMatch(""))` +//! +//! Where I would expect: +//! 1. a failure because of the missing header +//! 2. `None` for the missing header +//! +//! This could be solved by either adapting `TypedHeader` or `IfNoneMatch`, I'm not sure which is +//! right. +//! +//! Some reading material for those interested: +//! * https://github.com/hyperium/headers/issues/204 +//! * https://github.com/hyperium/headers/pull/165 +//! * https://github.com/tokio-rs/axum/issues/1781 +//! * https://github.com/tokio-rs/axum/pull/1810 +//! * https://github.com/tokio-rs/axum/pull/2475 +//! +//! Right now I feel like adapting `IfNoneMatch` is the "most correct-ish" option. + +#[allow(clippy::disallowed_types)] +mod header_impl { + use axum_extra::headers::{self, ETag, Header, IfNoneMatch as OriginalIfNoneMatch}; + use derive_more::Deref; + + #[derive(Debug, Clone, PartialEq, Deref)] + pub(crate) struct IfNoneMatch(pub axum_extra::headers::IfNoneMatch); + + impl Header for IfNoneMatch { + fn name() -> &'static http::HeaderName { + OriginalIfNoneMatch::name() + } + + fn decode<'i, I>(values: &mut I) -> Result + where + Self: Sized, + I: Iterator, + { + let mut values = values.peekable(); + + // NOTE: this is the difference to the original implementation. + // When there is no header in the request, I want the decoding to fail. + // This makes Option> return `None`, and also matches + // most other header implementations. + if values.peek().is_none() { + Err(headers::Error::invalid()) + } else { + OriginalIfNoneMatch::decode(&mut values).map(IfNoneMatch) + } + } + + fn encode>(&self, values: &mut E) { + self.0.encode(values) + } + } + + impl From for IfNoneMatch { + fn from(value: ETag) -> Self { + Self(value.into()) + } + } +} + +pub(crate) use header_impl::IfNoneMatch; + +#[cfg(test)] +mod tests { + use super::*; + use anyhow::Result; + use axum::{RequestPartsExt, body::Body, extract::Request}; + use axum_extra::{ + TypedHeader, + headers::{ETag, HeaderMapExt as _}, + }; + use http::{HeaderMap, request}; + + fn parts(if_none_match: Option) -> request::Parts { + let mut builder = Request::builder(); + + if let Some(if_none_match) = if_none_match { + let headers = builder.headers_mut().unwrap(); + headers.typed_insert(if_none_match.clone()); + } + + let (parts, _body) = builder.uri("/").body(Body::empty()).unwrap().into_parts(); + + parts + } + + fn example_header() -> IfNoneMatch { + IfNoneMatch::from("\"some-etag-value\"".parse::().unwrap()) + } + + #[test] + fn test_normal_typed_get_with_empty_headers() { + let map = HeaderMap::new(); + assert!(map.typed_get::().is_none()); + assert!(map.typed_try_get::().unwrap().is_none()); + } + + #[test] + fn test_normal_typed_get_with_value_headers() -> Result<()> { + let if_none_match = example_header(); + + let mut map = HeaderMap::new(); + map.typed_insert(if_none_match.clone()); + + assert_eq!(map.typed_get::(), Some(if_none_match.clone())); + assert_eq!(map.typed_try_get::()?, Some(if_none_match)); + + Ok(()) + } + + #[tokio::test] + async fn test_extract_from_empty_request_via_optional_typed_header() -> Result<()> { + let mut parts = parts(None); + + assert!( + parts + .extract::>>() + .await? + // this is what we want, and the default `headers::IfNoneMatch` header can't + // offer. Or the impl of the `TypedHeader` extractor, depending on + // interpretation. + .is_none() + ); + + Ok(()) + } + + #[tokio::test] + async fn test_extract_from_empty_request_via_mandatory_typed_header() -> Result<()> { + let mut parts = parts(None); + + // mandatory extractor leads to error when the header is missing. + assert!(parts.extract::>().await.is_err()); + + Ok(()) + } + + #[tokio::test] + async fn test_extract_from_header_via_optional_typed_header() -> Result<()> { + let if_none_match = example_header(); + let mut parts = parts(Some(if_none_match.clone())); + + assert_eq!( + parts + .extract::>>() + .await? + .map(|th| th.0), + Some(if_none_match) + ); + + Ok(()) + } + + #[tokio::test] + async fn test_extract_from_header_via_mandatory_typed_header() -> Result<()> { + let if_none_match = example_header(); + let mut parts = parts(Some(if_none_match.clone())); + + assert_eq!( + parts.extract::>().await?.0, + if_none_match + ); + + Ok(()) + } +} diff --git a/src/web/headers/mod.rs b/src/web/headers/mod.rs index e89abd1da..7c1eef31b 100644 --- a/src/web/headers/mod.rs +++ b/src/web/headers/mod.rs @@ -1,10 +1,21 @@ mod canonical_url; +mod if_none_match; mod surrogate_key; pub use canonical_url::CanonicalUrl; use http::HeaderName; +pub(crate) use if_none_match::IfNoneMatch; pub use surrogate_key::{SURROGATE_KEY, SurrogateKey, SurrogateKeys}; /// Fastly's Surrogate-Control header /// https://www.fastly.com/documentation/reference/http/http-headers/Surrogate-Control/ pub static SURROGATE_CONTROL: HeaderName = HeaderName::from_static("surrogate-control"); + +/// compute our etag header value from some content +/// +/// Has to match the implementation in our build-script. +#[cfg(test)] +pub fn compute_etag>(content: T) -> axum_extra::headers::ETag { + let digest = md5::compute(&content); + format!("\"{:x}\"", digest).parse().unwrap() +} diff --git a/src/web/statics.rs b/src/web/statics.rs index 99266634e..9acc706ac 100644 --- a/src/web/statics.rs +++ b/src/web/statics.rs @@ -1,14 +1,19 @@ -use super::{cache::CachePolicy, metrics::request_recorder, routes::get_static}; +use super::{ + cache::CachePolicy, headers::IfNoneMatch, metrics::request_recorder, routes::get_static, +}; +use crate::db::mimes::APPLICATION_OPENSEARCH_XML; use axum::{ Router as AxumRouter, extract::{Extension, Request}, - http::header::CONTENT_TYPE, - middleware, - middleware::Next, + middleware::{self, Next}, response::{IntoResponse, Response}, routing::get_service, }; -use axum_extra::headers::HeaderValue; +use axum_extra::{ + headers::{ContentType, ETag, HeaderMapExt as _}, + typed_header::TypedHeader, +}; +use http::{StatusCode, Uri}; use tower_http::services::ServeDir; const VENDORED_CSS: &str = include_str!(concat!(env!("OUT_DIR"), "/vendored.css")); @@ -19,10 +24,14 @@ const RUSTDOC_2021_12_05_CSS: &str = const RUSTDOC_2025_08_20_CSS: &str = include_str!(concat!(env!("OUT_DIR"), "/rustdoc-2025-08-20.css")); +const STATIC_CACHE_POLICY: CachePolicy = CachePolicy::ForeverInCdnAndBrowser; + +include!(concat!(env!("OUT_DIR"), "/static_etag_map.rs")); + fn build_static_css_response(content: &'static str) -> impl IntoResponse { ( - Extension(CachePolicy::ForeverInCdnAndBrowser), - [(CONTENT_TYPE, mime::TEXT_CSS.as_ref())], + Extension(STATIC_CACHE_POLICY), + TypedHeader(ContentType::from(mime::TEXT_CSS)), content, ) } @@ -34,23 +43,60 @@ async fn set_needed_static_headers(req: Request, next: Next) -> Response { let mut response = next.run(req).await; if response.status().is_success() { - response - .extensions_mut() - .insert(CachePolicy::ForeverInCdnAndBrowser); + response.extensions_mut().insert(STATIC_CACHE_POLICY); } if is_opensearch_xml { // overwrite the content type for opensearch.xml, // otherwise mime-guess would return `text/xml`. - response.headers_mut().insert( - CONTENT_TYPE, - HeaderValue::from_static("application/opensearchdescription+xml"), - ); + response + .headers_mut() + .typed_insert(ContentType::from(APPLICATION_OPENSEARCH_XML.clone())); } response } +async fn conditional_get( + partial_uri: Uri, + if_none_match: Option>, + req: Request, + next: Next, +) -> Response { + let if_none_match = if_none_match.map(|th| th.0); + let resource_path = partial_uri.path().trim_start_matches('/'); + let Some(etag) = STATIC_ETAG_MAP.get(resource_path).map(|etag| { + etag.parse::() + .expect("compile time generated, should always pass") + }) else { + let res = next.run(req).await; + + debug_assert!( + !res.status().is_success(), + "no etag found for static resource at {}, but should exist.\n{:?}", + resource_path, + STATIC_ETAG_MAP, + ); + + return res; + }; + + if let Some(if_none_match) = if_none_match + && !if_none_match.precondition_passes(&etag) + { + return ( + StatusCode::NOT_MODIFIED, + TypedHeader(etag), + Extension(CachePolicy::ForeverInCdnAndBrowser), + ) + .into_response(); + } + + let mut res = next.run(req).await; + res.headers_mut().typed_insert(etag); + res +} + pub(crate) fn build_static_router() -> AxumRouter { AxumRouter::new() .route( @@ -80,25 +126,30 @@ pub(crate) fn build_static_router() -> AxumRouter { request_recorder(request, next, Some("static resource")).await })), ) + .layer(middleware::from_fn(conditional_get)) } #[cfg(test)] mod tests { - use super::{STYLE_CSS, VENDORED_CSS}; + use super::*; use crate::{ test::{AxumResponseTestExt, AxumRouterTestExt, async_wrapper}, - web::cache::CachePolicy, + web::headers::compute_etag, + }; + use axum::{Router, body::Body}; + use http::{ + HeaderMap, + header::{CONTENT_LENGTH, CONTENT_TYPE, ETAG}, }; - use axum::response::Response as AxumResponse; - use reqwest::StatusCode; use std::fs; use test_case::test_case; + use tower::ServiceExt as _; const STATIC_SEARCH_PATHS: &[&str] = &["static", "vendor"]; - fn content_length(resp: &AxumResponse) -> u64 { + fn content_length(resp: &Response) -> u64 { resp.headers() - .get("Content-Length") + .get(CONTENT_LENGTH) .expect("content-length header") .to_str() .unwrap() @@ -106,21 +157,73 @@ mod tests { .unwrap() } + fn etag(resp: &Response) -> ETag { + resp.headers().typed_get().unwrap() + } + + async fn test_conditional_get(web: &Router, path: &str) -> anyhow::Result<()> { + fn req(path: &str, f: impl FnOnce(&mut HeaderMap)) -> Request { + let mut builder = Request::builder().uri(path); + f(builder.headers_mut().unwrap()); + builder.body(Body::empty()).unwrap() + } + + // original request = 200 + let resp = web.clone().oneshot(req(path, |_| {})).await?; + + assert_eq!(resp.status(), StatusCode::OK); + let etag = etag(&resp); + + { + // if-none-match with correct etag + let if_none_match: IfNoneMatch = etag.into(); + + let cached_response = web + .clone() + .oneshot(req(path, |h| h.typed_insert(if_none_match))) + .await?; + + assert_eq!(cached_response.status(), StatusCode::NOT_MODIFIED); + } + + { + let other_if_none_match: IfNoneMatch = "\"some-other-etag\"" + .parse::() + .expect("valid etag") + .into(); + + let uncached_response = web + .clone() + .oneshot(req(path, |h| h.typed_insert(other_if_none_match))) + .await?; + + assert_eq!(uncached_response.status(), StatusCode::OK); + } + + Ok(()) + } + #[test] fn style_css() { async_wrapper(|env| async move { let web = env.web_app().await; - let resp = web.get("/-/static/style.css").await?; + const PATH: &str = "/-/static/style.css"; + let resp = web.get(PATH).await?; assert!(resp.status().is_success()); resp.assert_cache_control(CachePolicy::ForeverInCdnAndBrowser, env.config()); + let headers = resp.headers(); assert_eq!( - resp.headers().get("Content-Type"), + headers.get(CONTENT_TYPE), Some(&"text/css".parse().unwrap()), ); + assert_eq!(content_length(&resp), STYLE_CSS.len() as u64); + assert_eq!(etag(&resp), compute_etag(STYLE_CSS.as_bytes())); assert_eq!(resp.bytes().await?, STYLE_CSS.as_bytes()); + test_conditional_get(&web, PATH).await?; + Ok(()) }); } @@ -130,16 +233,22 @@ mod tests { async_wrapper(|env| async move { let web = env.web_app().await; - let resp = web.get("/-/static/vendored.css").await?; + const PATH: &str = "/-/static/vendored.css"; + + let resp = web.get(PATH).await?; assert!(resp.status().is_success(), "{}", resp.text().await?); + resp.assert_cache_control(CachePolicy::ForeverInCdnAndBrowser, env.config()); assert_eq!( - resp.headers().get("Content-Type"), + resp.headers().get(CONTENT_TYPE), Some(&"text/css".parse().unwrap()), ); assert_eq!(content_length(&resp), VENDORED_CSS.len() as u64); + assert_eq!(etag(&resp), compute_etag(VENDORED_CSS.as_bytes())); assert_eq!(resp.text().await?, VENDORED_CSS); + test_conditional_get(&web, PATH).await?; + Ok(()) }); } @@ -157,6 +266,7 @@ mod tests { // to an IO-error. let resp = web.get("/-/static/index.js/something").await?; assert_eq!(resp.status().as_u16(), StatusCode::NOT_FOUND); + assert!(resp.headers().get(ETAG).is_none()); Ok(()) }); @@ -174,12 +284,15 @@ mod tests { assert!(resp.status().is_success()); resp.assert_cache_control(CachePolicy::ForeverInCdnAndBrowser, env.config()); assert_eq!( - resp.headers().get("Content-Type"), + resp.headers().get(CONTENT_TYPE), Some(&"text/javascript".parse().unwrap()), ); assert!(content_length(&resp) > 10); + etag(&resp); // panics if etag missing or invalid assert!(resp.text().await?.contains(expected_content)); + test_conditional_get(&web, path).await?; + Ok(()) }); } @@ -203,11 +316,11 @@ mod tests { assert!(resp.status().is_success(), "failed to fetch {url:?}"); resp.assert_cache_control(CachePolicy::ForeverInCdnAndBrowser, env.config()); - assert_eq!( - resp.bytes().await?, - fs::read(path).unwrap(), - "failed to fetch {url:?}", - ); + let content = fs::read(path).unwrap(); + assert_eq!(etag(&resp), compute_etag(&content)); + assert_eq!(resp.bytes().await?, content, "failed to fetch {url:?}",); + + test_conditional_get(&web, &url).await?; } } @@ -221,6 +334,7 @@ mod tests { let response = env.web_app().await.get("/-/static/whoop-de-do.png").await?; response.assert_cache_control(CachePolicy::NoCaching, env.config()); assert_eq!(response.status(), StatusCode::NOT_FOUND); + assert!(response.headers().get(ETAG).is_none()); Ok(()) }); @@ -238,7 +352,7 @@ mod tests { let resp = web.get(&url).await?; assert_eq!( - resp.headers().get("Content-Type"), + resp.headers().get(CONTENT_TYPE), Some(&mime.parse().unwrap()), "{url:?} has an incorrect content type", );