This commit is contained in:
_ 2025-08-14 03:25:39 +00:00
parent d4d0adaacc
commit dfbf23ed6a
3 changed files with 281 additions and 268 deletions

View file

@ -1,18 +1,16 @@
use anyhow::{Context as _, Result, anyhow, bail}; use anyhow::{Context as _, Result, bail};
use base64::Engine as _;
use camino::Utf8PathBuf; use camino::Utf8PathBuf;
use chrono::{DateTime, TimeZone as _, Utc}; use chrono::{DateTime, Utc};
use clap::Parser as _; use clap::Parser as _;
use icalendar::{Component as _, EventLike as _};
use serde::Deserialize; use serde::Deserialize;
use std::{ use std::{collections::BTreeSet, io::Write as _, time::Duration};
collections::BTreeSet, io::Write as _, path::PathBuf, str::FromStr as _, time::Duration,
};
use url::Url; use url::Url;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
mod wac_ical;
#[derive(Clone, Default, Deserialize)] #[derive(Clone, Default, Deserialize)]
struct Downloadable { struct Downloadable {
/// URL to scrape to download the JSON /// URL to scrape to download the JSON
@ -43,19 +41,6 @@ struct ConfigCampfire {
ui: CalendarUi, ui: CalendarUi,
} }
/// Google Calendar has a public ics endpoint that we scrape for all upstream Google Calendars
#[derive(Clone, Default, Deserialize)]
struct ConfigIcal {
#[serde(flatten)]
dl: Downloadable,
/// Magical ID we pass to Google to deep-link to Google Calendar events
google_id: Option<String>,
#[serde(flatten)]
ui: CalendarUi,
}
#[derive(Deserialize)] #[derive(Deserialize)]
struct ConfigOutput { struct ConfigOutput {
/// Used as the OpenGraph description in meta tags /// Used as the OpenGraph description in meta tags
@ -78,20 +63,20 @@ struct ConfigOutput {
#[derive(Deserialize)] #[derive(Deserialize)]
struct Config { struct Config {
campfires: Vec<ConfigCampfire>, campfires: Vec<ConfigCampfire>,
icals: Vec<ConfigIcal>, icals: Vec<wac_ical::Config>,
output: ConfigOutput, output: ConfigOutput,
} }
#[derive(clap::Parser)] #[derive(clap::Parser)]
struct CliAuto { struct CliAuto {
#[arg(long)] #[arg(long)]
config: PathBuf, config: Utf8PathBuf,
} }
#[derive(clap::Parser)] #[derive(clap::Parser)]
struct CliIcsDebug { struct CliIcsDebug {
#[arg(long)] #[arg(long)]
config: PathBuf, config: Utf8PathBuf,
} }
#[derive(clap::Subcommand)] #[derive(clap::Subcommand)]
@ -164,81 +149,10 @@ impl DatePerhapsTime {
} }
} }
fn normalize_date_perhaps_time(
x: &icalendar::DatePerhapsTime,
tz: chrono_tz::Tz,
) -> Result<DatePerhapsTime> {
Ok(match x {
icalendar::DatePerhapsTime::DateTime(x) => {
let dt = x
.try_into_utc()
.context("Data error - Could not convert event datetime to UTC")?
.with_timezone(&tz);
DatePerhapsTime { dt, all_day: false }
}
icalendar::DatePerhapsTime::Date(date) => {
let midnight = chrono::NaiveTime::default();
let dt = tz.from_local_datetime(&date.and_time(midnight)).single().context("DateTime doesn't map to a single unambiguous datetime when converting to our timezone")?;
DatePerhapsTime { dt, all_day: true }
}
})
}
fn recurring_dates_opt(
params: &Parameters,
ev: &icalendar::Event,
rrule: &icalendar::Property,
) -> Result<Option<impl Iterator<Item = DatePerhapsTime>>> {
let dtstart = ev
.get_start()
.context("Data error - Event has no DTSTART")?;
let all_day = match &dtstart {
icalendar::DatePerhapsTime::Date(_) => true,
icalendar::DatePerhapsTime::DateTime(_) => false,
};
let dtstart_norm = normalize_date_perhaps_time(&dtstart, params.tz)?;
let rr = rrule::RRule::from_str(rrule.value())
.with_context(|| format!("RRule parse failed `{}`", rrule.value()))?;
if let Some(until) = rr.get_until()
&& *until < params.output_start
{
// This skips over some bad data in our test set where we fail to parse a recurring event that's already ended before our output window starts
return Ok(None);
}
let rrule_tz = params.tz.into();
let rr = rr.build(dtstart_norm.dt.with_timezone(&rrule_tz))?;
let dates = rr
.after(params.output_start.with_timezone(&rrule_tz))
.before(params.output_stop.with_timezone(&rrule_tz))
.all(10)
.dates
.into_iter()
.map(move |dtstart| DatePerhapsTime {
dt: dtstart.with_timezone(&params.tz),
all_day,
});
Ok(Some(dates))
}
fn recurring_dates(
params: &Parameters,
ev: &icalendar::Event,
rrule: &icalendar::Property,
) -> Result<impl Iterator<Item = DatePerhapsTime>> {
Ok(recurring_dates_opt(params, ev, rrule)?
.into_iter()
.flatten())
}
/// An event that's been duplicated according to its recurrence rules, so we can sort by datetimes /// An event that's been duplicated according to its recurrence rules, so we can sort by datetimes
struct EventInstance { struct EventInstance {
calendar_ui: CalendarUi, calendar_ui: CalendarUi,
dtstart: DatePerhapsTime, dtstart: DatePerhapsTime,
// ev: &'a icalendar::Event,
location: Option<String>, location: Option<String>,
recurrence_id: Option<icalendar::DatePerhapsTime>, recurrence_id: Option<icalendar::DatePerhapsTime>,
summary: Option<String>, summary: Option<String>,
@ -246,92 +160,6 @@ struct EventInstance {
url: Option<String>, url: Option<String>,
} }
fn google_url(
dtstart: DatePerhapsTime,
has_rrule: bool,
uid: Option<&str>,
google_id: &str,
) -> Result<Option<String>> {
let uid = uid.context("No UID")?;
if uid.len() > 100 {
// There's one event in one of our test Google calendars which originates from Microsoft Exchange and has a totally different UID format from any other event. I was not able to reverse it, so I'm skipping it for now.
return Ok(None);
}
// Strip off the back part of the Google UID
let idx = uid.find(['@', '_']).unwrap_or(uid.len());
let uid_2 = &uid[..idx];
let utc_dtstart = dtstart
.dt
.with_timezone(&chrono_tz::UTC)
.format("%Y%m%dT%H%M%SZ")
.to_string();
let eid_plain = if has_rrule {
// Recurring events have an extra timestamp in their base64 to disambiguiate
format!("{uid_2}_{utc_dtstart} {google_id}")
} else {
format!("{uid_2} {google_id}")
};
let eid = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(&eid_plain);
let mut link = url::Url::parse("https://www.google.com/calendar/event").unwrap();
link.query_pairs_mut().append_pair("eid", &eid);
Ok(Some(link.to_string()))
}
fn ical_event_instances(
config_ical: &ConfigIcal,
params: &Parameters,
ev: &icalendar::Event,
) -> Result<Vec<EventInstance>> {
let dates = if let Some(rrule) = ev.properties().get("RRULE") {
recurring_dates(params, ev, rrule)?.collect()
} else {
// Event that occurs once
let dtstart = ev.get_start().context("Data error - Event has no start")?;
let dtstart_normalized = normalize_date_perhaps_time(&dtstart, params.tz)?;
if dtstart_normalized.dt < params.output_start || dtstart_normalized.dt > params.output_stop
{
return Ok(vec![]);
}
vec![dtstart_normalized]
};
let instances = dates
.into_iter()
.map(|dtstart| {
let has_rrule = ev.properties().get("RRULE").is_some();
let uid = ev.get_uid().map(|s| s.to_string());
let url = if let Some(url) = ev.get_url() {
Some(url.to_string())
} else if let Some(google_id) = &config_ical.google_id {
google_url(dtstart, has_rrule, uid.as_deref(), google_id)?
} else {
None
};
Ok::<_, anyhow::Error>(EventInstance {
calendar_ui: config_ical.ui.clone(),
dtstart,
location: ev.get_location().map(|s| s.to_string()),
recurrence_id: ev.get_recurrence_id(),
summary: ev.get_summary().map(|s| s.to_string()),
uid,
url,
})
})
.collect();
instances
}
struct ICal {
/// The parsed ics file
cal: icalendar::Calendar,
/// The config used to load this calendar
config: ConfigIcal,
}
/// Used to link recurrence exceptions to the original events they replace /// Used to link recurrence exceptions to the original events they replace
#[derive(Eq, Ord, PartialOrd, PartialEq)] #[derive(Eq, Ord, PartialOrd, PartialEq)]
struct RecurrenceKey<'a> { struct RecurrenceKey<'a> {
@ -339,98 +167,15 @@ struct RecurrenceKey<'a> {
uid: &'a str, uid: &'a str,
} }
impl ICal {
fn read_from_str(config: ConfigIcal, s: &str) -> Result<Self> {
let cal = s.parse().map_err(|s| anyhow!("parse error {s}"))?;
let cal = Self { cal, config };
Ok(cal)
}
fn read_from_downloadable(config: ConfigIcal) -> Result<Self> {
let s = std::fs::read_to_string(&config.dl.file_path)?;
Self::read_from_str(config, &s)
}
fn events(&self) -> impl Iterator<Item = &icalendar::Event> {
self.cal.components.iter().filter_map(|comp| {
if let icalendar::CalendarComponent::Event(ev) = comp {
Some(ev)
} else {
None
}
})
}
/// Returns an unsorted list of event instances for this calendar
fn event_instances(&self, params: &Parameters) -> Result<Vec<EventInstance>> {
let mut instances = vec![];
let mut recurrence_exceptions = BTreeSet::new();
for ev in self.events() {
let eis = match ical_event_instances(&self.config, params, ev)
.with_context(|| format!("Failed to process event with UID '{:?}'", ev.get_uid()))
{
Ok(x) => x,
Err(e) => {
if ev.get_last_modified().context("Event has no timestamp")?
< params.ignore_before
{
tracing::warn!("Ignoring error from very old event {e:?}");
continue;
} else {
Err(e)?
}
}
};
for ei in eis {
instances.push(ei);
}
if let Some(recurrence_id) = ev.get_recurrence_id() {
// This is a recurrence exception and we must handle it specially by later deleting the original event it replaces
let recurrence_id = normalize_date_perhaps_time(&recurrence_id, params.tz)
.context("We should be able to normalize recurrence IDs")?;
let uid = ev
.get_uid()
.context("Every recurrence exception should have a UID")?;
recurrence_exceptions.insert(RecurrenceKey { recurrence_id, uid });
}
}
// Find all recurring events that are replaced with recurrence exceptions and delete the originals.
// There is probably a not-linear-time way to do this, but this should be fine.
instances.retain(|ev| {
if ev.recurrence_id.is_some() {
// This is a recurrence exception, exceptions never delete themselves
return true;
}
let Some(uid) = &ev.uid else {
// If there's no UID, we can't apply recurrence exceptions
return true;
};
let key = RecurrenceKey {
recurrence_id: ev.dtstart,
uid,
};
!recurrence_exceptions.contains(&key)
});
Ok(instances)
}
}
#[derive(Default)] #[derive(Default)]
struct Data { struct Data {
icals: Vec<ICal>, icals: Vec<wac_ical::Calendar>,
} }
fn read_data_from_disk(config: &Config) -> Result<Data> { fn read_data_from_disk(config: &Config) -> Result<Data> {
let mut data = Data::default(); let mut data = Data::default();
for config_ical in &config.icals { for config_ical in &config.icals {
let cal = ICal::read_from_downloadable(config_ical.clone())?; let cal = wac_ical::Calendar::read_from_downloadable(config_ical.clone())?;
data.icals.push(cal); data.icals.push(cal);
} }

View file

@ -1,4 +1,5 @@
use super::*; use super::*;
use chrono::TimeZone as _;
fn chicago_time( fn chicago_time(
year: i32, year: i32,
@ -22,6 +23,8 @@ fn dt_from_ts(ts: i64) -> DateTime<chrono_tz::Tz> {
/// Expect that parsing a calendar works /// Expect that parsing a calendar works
#[test] #[test]
fn calendar_from_str() -> Result<()> { fn calendar_from_str() -> Result<()> {
use wac_ical::{Calendar, Config};
// Blank lines added for clarity // Blank lines added for clarity
let s = r#" let s = r#"
BEGIN:VCALENDAR BEGIN:VCALENDAR
@ -64,7 +67,7 @@ END:VEVENT
END:VCALENDAR END:VCALENDAR
"#; "#;
let ical = ICal::read_from_str(ConfigIcal::default(), s)?; let ical = Calendar::read_from_str(Config::default(), s)?;
let now = dt_from_ts(1755000000); let now = dt_from_ts(1755000000);
let params = Parameters::new(now)?; let params = Parameters::new(now)?;
let instances = ical.event_instances(&params)?; let instances = ical.event_instances(&params)?;
@ -82,6 +85,8 @@ END:VCALENDAR
#[test] #[test]
fn hand_written() -> Result<()> { fn hand_written() -> Result<()> {
use wac_ical::{Calendar, Config};
let s = r#" let s = r#"
BEGIN:VCALENDAR BEGIN:VCALENDAR
@ -102,7 +107,7 @@ END:VEVENT
END:VCALENDAR END:VCALENDAR
"#; "#;
let ical = ICal::read_from_str(ConfigIcal::default(), s)?; let ical = Calendar::read_from_str(Config::default(), s)?;
let params = Parameters { let params = Parameters {
ignore_before: chicago_time(2025, 1, 1, 0, 0, 0), ignore_before: chicago_time(2025, 1, 1, 0, 0, 0),
output_start: chicago_time(2025, 7, 1, 0, 0, 0), output_start: chicago_time(2025, 7, 1, 0, 0, 0),
@ -132,6 +137,8 @@ END:VCALENDAR
/// Expect that recurrent exceptions work correctly and don't duplicate events /// Expect that recurrent exceptions work correctly and don't duplicate events
#[test] #[test]
fn recurrence_exceptions() -> Result<()> { fn recurrence_exceptions() -> Result<()> {
use wac_ical::{Calendar, Config};
let s = r#" let s = r#"
BEGIN:VCALENDAR BEGIN:VCALENDAR
@ -188,7 +195,7 @@ END:VEVENT
END:VCALENDAR END:VCALENDAR
"#; "#;
let ical = ICal::read_from_str(ConfigIcal::default(), s)?; let ical = Calendar::read_from_str(Config::default(), s)?;
let params = Parameters { let params = Parameters {
ignore_before: chicago_time(2025, 1, 1, 0, 0, 0), ignore_before: chicago_time(2025, 1, 1, 0, 0, 0),
output_start: chicago_time(2025, 7, 1, 0, 0, 0), output_start: chicago_time(2025, 7, 1, 0, 0, 0),

261
src/wac_ical.rs Normal file
View file

@ -0,0 +1,261 @@
//! Structs and functions specific to gathering input from ics files, which is a popular format that Google Calendar happens to put out
use super::{CalendarUi, DatePerhapsTime, Downloadable, EventInstance, Parameters, RecurrenceKey};
use anyhow::{Context as _, Result, anyhow};
use base64::Engine as _;
use chrono::TimeZone as _;
use icalendar::{Component as _, EventLike as _};
use serde::Deserialize;
use std::{collections::BTreeSet, str::FromStr as _};
/// Google Calendar has a public ics endpoint that we scrape for all upstream Google Calendars
#[derive(Clone, Default, Deserialize)]
pub(crate) struct Config {
#[serde(flatten)]
pub(crate) dl: Downloadable,
/// Magical ID we pass to Google to deep-link to Google Calendar events
google_id: Option<String>,
#[serde(flatten)]
pub(crate) ui: CalendarUi,
}
pub(crate) struct Calendar {
/// The parsed ics file
cal: icalendar::Calendar,
/// The config used to load this calendar
config: Config,
}
fn normalize_date_perhaps_time(
x: &icalendar::DatePerhapsTime,
tz: chrono_tz::Tz,
) -> Result<DatePerhapsTime> {
Ok(match x {
icalendar::DatePerhapsTime::DateTime(x) => {
let dt = x
.try_into_utc()
.context("Data error - Could not convert event datetime to UTC")?
.with_timezone(&tz);
DatePerhapsTime { dt, all_day: false }
}
icalendar::DatePerhapsTime::Date(date) => {
let midnight = chrono::NaiveTime::default();
let dt = tz.from_local_datetime(&date.and_time(midnight)).single().context("DateTime doesn't map to a single unambiguous datetime when converting to our timezone")?;
DatePerhapsTime { dt, all_day: true }
}
})
}
fn recurring_dates_opt(
params: &Parameters,
ev: &icalendar::Event,
rrule: &icalendar::Property,
) -> Result<Option<impl Iterator<Item = DatePerhapsTime>>> {
let dtstart = ev
.get_start()
.context("Data error - Event has no DTSTART")?;
let all_day = match &dtstart {
icalendar::DatePerhapsTime::Date(_) => true,
icalendar::DatePerhapsTime::DateTime(_) => false,
};
let dtstart_norm = normalize_date_perhaps_time(&dtstart, params.tz)?;
let rr = rrule::RRule::from_str(rrule.value())
.with_context(|| format!("RRule parse failed `{}`", rrule.value()))?;
if let Some(until) = rr.get_until()
&& *until < params.output_start
{
// This skips over some bad data in our test set where we fail to parse a recurring event that's already ended before our output window starts
return Ok(None);
}
let rrule_tz = params.tz.into();
let rr = rr.build(dtstart_norm.dt.with_timezone(&rrule_tz))?;
let dates = rr
.after(params.output_start.with_timezone(&rrule_tz))
.before(params.output_stop.with_timezone(&rrule_tz))
.all(10)
.dates
.into_iter()
.map(move |dtstart| DatePerhapsTime {
dt: dtstart.with_timezone(&params.tz),
all_day,
});
Ok(Some(dates))
}
fn recurring_dates(
params: &Parameters,
ev: &icalendar::Event,
rrule: &icalendar::Property,
) -> Result<impl Iterator<Item = DatePerhapsTime>> {
Ok(recurring_dates_opt(params, ev, rrule)?
.into_iter()
.flatten())
}
fn google_url(
dtstart: DatePerhapsTime,
has_rrule: bool,
uid: Option<&str>,
google_id: &str,
) -> Result<Option<String>> {
let uid = uid.context("No UID")?;
if uid.len() > 100 {
// There's one event in one of our test Google calendars which originates from Microsoft Exchange and has a totally different UID format from any other event. I was not able to reverse it, so I'm skipping it for now.
return Ok(None);
}
// Strip off the back part of the Google UID
let idx = uid.find(['@', '_']).unwrap_or(uid.len());
let uid_2 = &uid[..idx];
let utc_dtstart = dtstart
.dt
.with_timezone(&chrono_tz::UTC)
.format("%Y%m%dT%H%M%SZ")
.to_string();
let eid_plain = if has_rrule {
// Recurring events have an extra timestamp in their base64 to disambiguiate
format!("{uid_2}_{utc_dtstart} {google_id}")
} else {
format!("{uid_2} {google_id}")
};
let eid = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(&eid_plain);
let mut link = url::Url::parse("https://www.google.com/calendar/event").unwrap();
link.query_pairs_mut().append_pair("eid", &eid);
Ok(Some(link.to_string()))
}
fn ical_event_instances(
config_ical: &Config,
params: &Parameters,
ev: &icalendar::Event,
) -> Result<Vec<EventInstance>> {
let dates = if let Some(rrule) = ev.properties().get("RRULE") {
recurring_dates(params, ev, rrule)?.collect()
} else {
// Event that occurs once
let dtstart = ev.get_start().context("Data error - Event has no start")?;
let dtstart_normalized = normalize_date_perhaps_time(&dtstart, params.tz)?;
if dtstart_normalized.dt < params.output_start || dtstart_normalized.dt > params.output_stop
{
return Ok(vec![]);
}
vec![dtstart_normalized]
};
let instances = dates
.into_iter()
.map(|dtstart| {
let has_rrule = ev.properties().get("RRULE").is_some();
let uid = ev.get_uid().map(|s| s.to_string());
let url = if let Some(url) = ev.get_url() {
Some(url.to_string())
} else if let Some(google_id) = &config_ical.google_id {
google_url(dtstart, has_rrule, uid.as_deref(), google_id)?
} else {
None
};
Ok::<_, anyhow::Error>(EventInstance {
calendar_ui: config_ical.ui.clone(),
dtstart,
location: ev.get_location().map(|s| s.to_string()),
recurrence_id: ev.get_recurrence_id(),
summary: ev.get_summary().map(|s| s.to_string()),
uid,
url,
})
})
.collect();
instances
}
impl Calendar {
pub(crate) fn read_from_str(config: Config, s: &str) -> Result<Self> {
let cal = s.parse().map_err(|s| anyhow!("parse error {s}"))?;
let cal = Self { cal, config };
Ok(cal)
}
pub(crate) fn read_from_downloadable(config: Config) -> Result<Self> {
let s = std::fs::read_to_string(&config.dl.file_path)?;
Self::read_from_str(config, &s)
}
fn events(&self) -> impl Iterator<Item = &icalendar::Event> {
self.cal.components.iter().filter_map(|comp| {
if let icalendar::CalendarComponent::Event(ev) = comp {
Some(ev)
} else {
None
}
})
}
/// Returns an unsorted list of event instances for this calendar
pub(crate) fn event_instances(&self, params: &Parameters) -> Result<Vec<EventInstance>> {
let mut instances = vec![];
let mut recurrence_exceptions = BTreeSet::new();
for ev in self.events() {
let eis = match ical_event_instances(&self.config, params, ev)
.with_context(|| format!("Failed to process event with UID '{:?}'", ev.get_uid()))
{
Ok(x) => x,
Err(e) => {
if ev.get_last_modified().context("Event has no timestamp")?
< params.ignore_before
{
tracing::warn!("Ignoring error from very old event {e:?}");
continue;
} else {
Err(e)?
}
}
};
for ei in eis {
instances.push(ei);
}
if let Some(recurrence_id) = ev.get_recurrence_id() {
// This is a recurrence exception and we must handle it specially by later deleting the original event it replaces
let recurrence_id = normalize_date_perhaps_time(&recurrence_id, params.tz)
.context("We should be able to normalize recurrence IDs")?;
let uid = ev
.get_uid()
.context("Every recurrence exception should have a UID")?;
recurrence_exceptions.insert(RecurrenceKey { recurrence_id, uid });
}
}
// Find all recurring events that are replaced with recurrence exceptions and delete the originals.
// There is probably a not-linear-time way to do this, but this should be fine.
instances.retain(|ev| {
if ev.recurrence_id.is_some() {
// This is a recurrence exception, exceptions never delete themselves
return true;
}
let Some(uid) = &ev.uid else {
// If there's no UID, we can't apply recurrence exceptions
return true;
};
let key = RecurrenceKey {
recurrence_id: ev.dtstart,
uid,
};
!recurrence_exceptions.contains(&key)
});
Ok(instances)
}
}