diff --git a/src/web/cache.rs b/src/web/cache.rs index 4ccffb286..fc567ef78 100644 --- a/src/web/cache.rs +++ b/src/web/cache.rs @@ -18,6 +18,9 @@ use tracing::error; /// This enables us to use the fastly "soft purge" for everything. pub const SURROGATE_KEY_ALL: SurrogateKey = SurrogateKey::from_static("all"); +/// cache poicy for static assets like rustdoc files or build assets. +pub const STATIC_ASSET_CACHE_POLICY: CachePolicy = CachePolicy::ForeverInCdnAndBrowser; + #[derive(Debug, Clone, PartialEq)] pub struct ResponseCacheHeaders { pub cache_control: Option, diff --git a/src/web/file.rs b/src/web/file.rs index 915be15f6..b501992bf 100644 --- a/src/web/file.rs +++ b/src/web/file.rs @@ -18,6 +18,12 @@ use axum_extra::{ }; use std::time::SystemTime; use tokio_util::io::ReaderStream; +use tracing::warn; + +// https://docs.fastly.com/products/compute-resource-limits#default-limits +// https://www.fastly.com/documentation/guides/full-site-delivery/performance/failure-modes-with-large-objects/ +// https://www.fastly.com/documentation/guides/full-site-delivery/caching/segmented-caching/ +const FASTLY_CACHE_MAX_OBJECT_SIZE: usize = 100 * 1024 * 1024; // 100 MB #[derive(Debug)] pub(crate) struct File(pub(crate) Blob); @@ -41,9 +47,13 @@ impl File { #[cfg(test)] impl File { - pub fn into_response(self, if_none_match: Option<&IfNoneMatch>) -> AxumResponse { + pub fn into_response( + self, + if_none_match: Option<&IfNoneMatch>, + cache_policy: CachePolicy, + ) -> AxumResponse { let streaming_blob: StreamingBlob = self.0.into(); - StreamingFile(streaming_blob).into_response(if_none_match) + StreamingFile(streaming_blob).into_response(if_none_match, cache_policy) } } @@ -56,8 +66,40 @@ impl StreamingFile { Ok(StreamingFile(storage.get_stream(path).await?)) } - pub fn into_response(self, if_none_match: Option<&IfNoneMatch>) -> AxumResponse { - const CACHE_POLICY: CachePolicy = CachePolicy::ForeverInCdnAndBrowser; + pub fn into_response( + self, + if_none_match: Option<&IfNoneMatch>, + mut cache_policy: CachePolicy, + ) -> AxumResponse { + // by default Fastly can only cache objects up to 100 MiB. + // Since we're streaming the response via chunked encoding, fastly itself doesn't know + // the object size until the streamed data size is > 100 MiB. In this case fastly just + // cuts the connection. + // To avoid issues with caching large files, we disable CDN caching for files that are too + // big. + // + // See: + // https://docs.fastly.com/products/compute-resource-limits#default-limits + // https://www.fastly.com/documentation/guides/full-site-delivery/performance/failure-modes-with-large-objects/ + // https://www.fastly.com/documentation/guides/full-site-delivery/caching/segmented-caching/ + // + // For now I use the `NoStoreMustRevalidate` policy, the important cache-control statement + // is only the `no-store` part. + // + // Future optimization could be: + // * only forbid fastly to store, and browsers still could. + // * implement segmented caching for large files somehow. + if self.0.content_length > FASTLY_CACHE_MAX_OBJECT_SIZE + && !matches!(cache_policy, CachePolicy::NoStoreMustRevalidate) + { + warn!( + storage_path = self.0.path, + content_length = self.0.content_length, + "Disabling CDN caching for large file" + ); + cache_policy = CachePolicy::NoStoreMustRevalidate; + } + let last_modified = LastModified::from(SystemTime::from(self.0.date_updated)); if let Some(if_none_match) = if_none_match @@ -69,7 +111,7 @@ impl StreamingFile { // it's generally recommended to repeat caching headers on 304 responses TypedHeader(etag.clone()), TypedHeader(last_modified), - Extension(CACHE_POLICY), + Extension(cache_policy), ) .into_response() } else { @@ -81,7 +123,7 @@ impl StreamingFile { TypedHeader(ContentType::from(self.0.mime)), TypedHeader(last_modified), self.0.etag.map(TypedHeader), - Extension(CACHE_POLICY), + Extension(cache_policy), Body::from_stream(stream), ) .into_response() @@ -92,12 +134,18 @@ impl StreamingFile { #[cfg(test)] mod tests { use super::*; - use crate::{storage::CompressionAlgorithm, test::TestEnvironment, web::headers::compute_etag}; + use crate::{ + storage::CompressionAlgorithm, + test::TestEnvironment, + web::{cache::STATIC_ASSET_CACHE_POLICY, headers::compute_etag}, + }; use axum_extra::headers::{ETag, HeaderMapExt as _}; use chrono::Utc; use http::header::{CACHE_CONTROL, ETAG, LAST_MODIFIED}; use std::{io, rc::Rc}; + const CONTENT: &[u8] = b"Hello, world!"; + fn streaming_blob( content: impl Into>, alg: Option, @@ -114,13 +162,28 @@ mod tests { } } + #[test] + fn test_big_file_stream_drops_cache_policy() { + let mut stream = streaming_blob(CONTENT, None); + stream.content_length = FASTLY_CACHE_MAX_OBJECT_SIZE + 1; + + let response = + StreamingFile(stream).into_response(None, CachePolicy::ForeverInCdnAndBrowser); + // even though we passed a cache policy in `into_response`, it should be overridden to + // `NoCaching` due to the large size of the file. + let cache = response + .extensions() + .get::() + .expect("missing cache response extension"); + assert!(matches!(cache, CachePolicy::NoStoreMustRevalidate)); + } + #[tokio::test] async fn test_stream_into_response() -> Result<()> { - const CONTENT: &[u8] = b"Hello, world!"; let etag: ETag = { // first request normal let stream = StreamingFile(streaming_blob(CONTENT, None)); - let resp = stream.into_response(None); + let resp = stream.into_response(None, STATIC_ASSET_CACHE_POLICY); assert!(resp.status().is_success()); assert!(resp.headers().get(CACHE_CONTROL).is_none()); let cache = resp @@ -138,7 +201,7 @@ mod tests { { // cached request let stream = StreamingFile(streaming_blob(CONTENT, None)); - let resp = stream.into_response(Some(&if_none_match)); + let resp = stream.into_response(Some(&if_none_match), STATIC_ASSET_CACHE_POLICY); assert_eq!(resp.status(), StatusCode::NOT_MODIFIED); // cache related headers are repeated on the not-modified response @@ -172,7 +235,7 @@ mod tests { file.0.date_updated = now; - let resp = file.into_response(None); + let resp = file.into_response(None, STATIC_ASSET_CACHE_POLICY); assert!(resp.status().is_success()); assert!(resp.headers().get(CACHE_CONTROL).is_none()); let cache = resp diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index ffdce20b0..9fd2179ab 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -11,7 +11,7 @@ use crate::{ utils::{self, Dependency}, web::{ MetaData, ReqVersion, axum_cached_redirect, - cache::CachePolicy, + cache::{CachePolicy, STATIC_ASSET_CACHE_POLICY}, crate_details::CrateDetails, csp::Csp, error::{AxumNope, AxumResult}, @@ -228,7 +228,7 @@ async fn try_serve_legacy_toolchain_asset( // which is reached via the new handler. Ok(StreamingFile::from_path(&storage, &path) .await? - .into_response(if_none_match)) + .into_response(if_none_match, STATIC_ASSET_CACHE_POLICY)) } /// Handler called for `/:crate` and `/:crate/:version` URLs. Automatically redirects to the docs @@ -344,7 +344,8 @@ pub(crate) async fn rustdoc_redirector_handler( ) .await { - Ok(blob) => Ok(StreamingFile(blob).into_response(if_none_match.as_deref())), + Ok(blob) => Ok(StreamingFile(blob) + .into_response(if_none_match.as_deref(), STATIC_ASSET_CACHE_POLICY)), Err(err) => { if !matches!(err.downcast_ref(), Some(AxumNope::ResourceNotFound)) && !matches!(err.downcast_ref(), Some(crate::storage::PathNotFoundError)) @@ -763,7 +764,9 @@ pub(crate) async fn rustdoc_html_server_handler( // default asset caching behaviour is `Cache::ForeverInCdnAndBrowser`. // This is an edge-case when we serve invocation specific static assets under `/latest/`: // https://github.com/rust-lang/docs.rs/issues/1593 - return Ok(StreamingFile(blob).into_response(if_none_match.as_deref())); + return Ok( + StreamingFile(blob).into_response(if_none_match.as_deref(), STATIC_ASSET_CACHE_POLICY) + ); } let latest_release = krate.latest_release()?; @@ -982,9 +985,11 @@ pub(crate) async fn json_download_handler( Some(wanted_compression), ); + let cache_policy = CachePolicy::ForeverInCdn(krate.name.clone().into()); + let (mut response, updated_storage_path) = match storage.get_raw_stream(&storage_path).await { Ok(file) => ( - StreamingFile(file).into_response(if_none_match.as_deref()), + StreamingFile(file).into_response(if_none_match.as_deref(), cache_policy), None, ), Err(err) if matches!(err.downcast_ref(), Some(crate::storage::PathNotFoundError)) => { @@ -1003,7 +1008,7 @@ pub(crate) async fn json_download_handler( // redirect to that as fallback ( StreamingFile(storage.get_raw_stream(&storage_path).await?) - .into_response(if_none_match.as_deref()), + .into_response(if_none_match.as_deref(), cache_policy), Some(storage_path), ) } else { @@ -1013,13 +1018,6 @@ pub(crate) async fn json_download_handler( Err(err) => return Err(err.into()), }; - // StreamingFile::into_response automatically set the default cache-policy for - // static assets (ForeverInCdnAndBrowser). - // Here we override it with the standard policy for build output. - response - .extensions_mut() - .insert(CachePolicy::ForeverInCdn(krate.name.clone().into())); - // set content-disposition to attachment to trigger download in browsers // For the attachment filename we can use just the filename without the path, // since that already contains all the info. @@ -1059,15 +1057,10 @@ pub(crate) async fn download_handler( let version = &matched_release.release.version; let archive_path = rustdoc_archive_path(params.name(), version); - let mut response = StreamingFile(storage.get_raw_stream(&archive_path).await?) - .into_response(if_none_match.as_deref()); - - // StreamingFile::into_response automatically set the default cache-policy for - // static assets (ForeverInCdnAndBrowser). - // Here we override it with the standard policy for build output. - response - .extensions_mut() - .insert(CachePolicy::ForeverInCdn(matched_release.name.into())); + let mut response = StreamingFile(storage.get_raw_stream(&archive_path).await?).into_response( + if_none_match.as_deref(), + CachePolicy::ForeverInCdn(matched_release.name.into()), + ); // set content-disposition to attachment to trigger download in browsers response.headers_mut().insert( @@ -1092,7 +1085,7 @@ pub(crate) async fn static_asset_handler( Ok(StreamingFile::from_path(&storage, &storage_path) .await? - .into_response(if_none_match.as_deref())) + .into_response(if_none_match.as_deref(), STATIC_ASSET_CACHE_POLICY)) } #[cfg(test)] diff --git a/src/web/source.rs b/src/web/source.rs index c21d06f90..21fb7a959 100644 --- a/src/web/source.rs +++ b/src/web/source.rs @@ -5,15 +5,14 @@ use crate::{ storage::PathNotFoundError, web::{ MetaData, ReqVersion, - cache::CachePolicy, + cache::{CachePolicy, STATIC_ASSET_CACHE_POLICY}, error::{AxumNope, AxumResult}, extractors::{ DbConnection, rustdoc::{PageKind, RustdocParams}, }, file::StreamingFile, - headers::CanonicalUrl, - headers::IfNoneMatch, + headers::{CanonicalUrl, IfNoneMatch}, match_version, page::templates::{RenderBrands, RenderRegular, RenderSolid, filters}, }, @@ -285,7 +284,8 @@ pub(crate) async fn source_browser_handler( let is_text = stream.mime.type_() == mime::TEXT || stream.mime == mime::APPLICATION_JSON; if !is_text { // if the file isn't text, serve it directly to the client - let mut response = StreamingFile(stream).into_response(if_none_match.as_deref()); + let mut response = StreamingFile(stream) + .into_response(if_none_match.as_deref(), STATIC_ASSET_CACHE_POLICY); response.headers_mut().typed_insert(canonical_url); response .extensions_mut() diff --git a/src/web/statics.rs b/src/web/statics.rs index 9acc706ac..e98874c9a 100644 --- a/src/web/statics.rs +++ b/src/web/statics.rs @@ -1,7 +1,7 @@ use super::{ cache::CachePolicy, headers::IfNoneMatch, metrics::request_recorder, routes::get_static, }; -use crate::db::mimes::APPLICATION_OPENSEARCH_XML; +use crate::{db::mimes::APPLICATION_OPENSEARCH_XML, web::cache::STATIC_ASSET_CACHE_POLICY}; use axum::{ Router as AxumRouter, extract::{Extension, Request}, @@ -24,13 +24,11 @@ const RUSTDOC_2021_12_05_CSS: &str = const RUSTDOC_2025_08_20_CSS: &str = include_str!(concat!(env!("OUT_DIR"), "/rustdoc-2025-08-20.css")); -const STATIC_CACHE_POLICY: CachePolicy = CachePolicy::ForeverInCdnAndBrowser; - include!(concat!(env!("OUT_DIR"), "/static_etag_map.rs")); fn build_static_css_response(content: &'static str) -> impl IntoResponse { ( - Extension(STATIC_CACHE_POLICY), + Extension(STATIC_ASSET_CACHE_POLICY), TypedHeader(ContentType::from(mime::TEXT_CSS)), content, ) @@ -43,7 +41,7 @@ async fn set_needed_static_headers(req: Request, next: Next) -> Response { let mut response = next.run(req).await; if response.status().is_success() { - response.extensions_mut().insert(STATIC_CACHE_POLICY); + response.extensions_mut().insert(STATIC_ASSET_CACHE_POLICY); } if is_opensearch_xml {