Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/db/mimes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ macro_rules! mime {
mime!(APPLICATION_ZIP, "application/zip");
mime!(APPLICATION_ZSTD, "application/zstd");
mime!(APPLICATION_GZIP, "application/gzip");
mime!(APPLICATION_XML, "application/xml");
mime!(TEXT_MARKDOWN, "text/markdown");
mime!(TEXT_RUST, "text/rust");
mime!(TEXT_TOML, "text/toml");
144 changes: 99 additions & 45 deletions src/web/sitemap.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,35 @@
use crate::{
Config,
db::mimes,
docbuilder::Limits,
impl_axum_webpage,
utils::{ConfigName, get_config},
utils::{ConfigName, get_config, report_error},
web::{
AxumErrorPage,
error::{AxumNope, AxumResult},
extractors::{DbConnection, Path},
page::templates::{RenderBrands, RenderSolid, filters},
},
};
use anyhow::Context as _;
use askama::Template;
use axum::{extract::Extension, http::StatusCode, response::IntoResponse};
use async_stream::stream;
use axum::{
body::{Body, Bytes},
extract::Extension,
http::StatusCode,
response::IntoResponse,
};
use axum_extra::{TypedHeader, headers::ContentType};
use chrono::{TimeZone, Utc};
use futures_util::stream::TryStreamExt;
use futures_util::{StreamExt as _, pin_mut};
use std::sync::Arc;
use tracing::{Span, error};
use tracing_futures::Instrument as _;

/// sitemap index
#[derive(Template)]
#[template(path = "core/sitemapindex.xml")]
#[template(path = "core/sitemap/index.xml")]
#[derive(Debug, Clone, PartialEq, Eq)]
struct SitemapIndexXml {
sitemaps: Vec<char>,
Expand All @@ -35,25 +46,19 @@ pub(crate) async fn sitemapindex_handler() -> impl IntoResponse {
SitemapIndexXml { sitemaps }
}

#[derive(Template)]
#[template(path = "core/sitemap/_item.xml")]
#[derive(Debug, Clone, PartialEq, Eq)]
struct SitemapRow {
struct SitemapItemXml {
crate_name: String,
last_modified: String,
target_name: String,
}

/// The sitemap
#[derive(Template)]
#[template(path = "core/sitemap.xml")]
#[derive(Debug, Clone, PartialEq, Eq)]
struct SitemapXml {
releases: Vec<SitemapRow>,
}
const SITEMAP_HEADER: &[u8] = br#"<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n"#;

impl_axum_webpage! {
SitemapXml,
content_type = "application/xml",
}
const SITEMAP_FOOTER: &[u8] = b"</urlset>\n";

pub(crate) async fn sitemap_handler(
Path(letter): Path<String>,
Expand All @@ -67,37 +72,86 @@ pub(crate) async fn sitemap_handler(
return Err(AxumNope::ResourceNotFound);
}

let releases: Vec<_> = sqlx::query!(
r#"SELECT crates.name,
releases.target_name,
MAX(releases.release_time) as "release_time!"
FROM crates
INNER JOIN releases ON releases.crate_id = crates.id
WHERE
rustdoc_status = true AND
crates.name ILIKE $1
GROUP BY crates.name, releases.target_name
"#,
format!("{letter}%"),
)
.fetch(&mut *conn)
.map_ok(|row| SitemapRow {
crate_name: row.name,
target_name: row
.target_name
.expect("when we have rustdoc_status=true, this field is filled"),
last_modified: row
.release_time
// On Aug 27 2022 we added `<link rel="canonical">` to all pages,
// so they should all get recrawled if they haven't been since then.
.max(Utc.with_ymd_and_hms(2022, 8, 28, 0, 0, 0).unwrap())
.format("%+")
.to_string(),
let stream_span = Span::current();

let stream = stream!({
let mut items: usize = 0;
let mut streamed_bytes: usize = SITEMAP_HEADER.len();

yield Ok(Bytes::from_static(SITEMAP_HEADER));

let result = sqlx::query!(
r#"SELECT crates.name,
releases.target_name,
MAX(releases.release_time) as "release_time!"
FROM crates
INNER JOIN releases ON releases.crate_id = crates.id
WHERE
rustdoc_status = true AND
crates.name ILIKE $1
GROUP BY crates.name, releases.target_name
"#,
format!("{letter}%"),
)
.fetch(&mut *conn);

pin_mut!(result);
while let Some(row) = result.next().await {
let row = match row.context("error fetching row from database") {
Ok(row) => row,
Err(err) => {
report_error(&err);
yield Err(AxumNope::InternalError(err));
break;
}
};

match (SitemapItemXml {
crate_name: row.name,
target_name: row
.target_name
.expect("when we have rustdoc_status=true, this field is filled"),
last_modified: row
.release_time
// On Aug 27 2022 we added `<link rel="canonical">` to all pages,
// so they should all get recrawled if they haven't been since then.
.max(Utc.with_ymd_and_hms(2022, 8, 28, 0, 0, 0).unwrap())
.format("%+")
.to_string(),
}
.render()
.context("error when rendering sitemap item xml"))
{
Ok(item) => {
let bytes = Bytes::from(item);
items += 1;
streamed_bytes += bytes.len();
yield Ok(bytes);
}
Err(err) => {
report_error(&err);
yield Err(AxumNope::InternalError(err));
break;
}
};
}

streamed_bytes += SITEMAP_FOOTER.len();
yield Ok(Bytes::from_static(SITEMAP_FOOTER));

if items > 50_000 || streamed_bytes > 50 * 1024 * 1024 {
// alert when sitemap limits are reached
// https://developers.google.com/search/docs/crawling-indexing/sitemaps/build-sitemap#general-guidelines
error!(items, streamed_bytes, letter, "sitemap limits exceeded")
}
})
.try_collect()
.await?;
.instrument(stream_span);

Ok(SitemapXml { releases })
Ok((
StatusCode::OK,
TypedHeader(ContentType::from(mimes::APPLICATION_XML.clone())),
Body::from_stream(stream),
))
}

#[derive(Template)]
Expand Down
15 changes: 0 additions & 15 deletions templates/core/sitemap.xml

This file was deleted.

10 changes: 10 additions & 0 deletions templates/core/sitemap/_item.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<url>
<loc>https://docs.rs/{{ crate_name }}/latest/{{ target_name }}/</loc>
<lastmod>{{ last_modified|escape_xml }}</lastmod>
<priority>1.0</priority>
</url>
<url>
<loc>https://docs.rs/{{ crate_name }}/latest/{{ target_name }}/all.html</loc>
<lastmod>{{ last_modified|escape_xml }}</lastmod>
<priority>0.8</priority>
</url>
File renamed without changes.
Loading