stargazer/tier1/ircddb.rs
1//! ircDDB last-heard HTML scraper.
2//!
3//! Scrapes `https://status.ircddb.net/cgi-bin/ircddb-log?30 0` — an HTML page
4//! listing recent D-STAR activity across the global ircDDB network. Each table
5//! row represents one heard transmission, including the operator callsign,
6//! repeater routing fields, and timestamp.
7//!
8//! Unlike the Pi-Star and XLX API fetchers which primarily discover
9//! *reflectors*, the ircDDB scraper discovers *activity*: which callsigns are
10//! transmitting through which reflectors. This activity data drives Tier 2
11//! monitoring decisions — reflectors with recent ircDDB activity are prioritised
12//! for live monitoring.
13//!
14//! **Poll interval:** every 60 seconds (default). The ircDDB last-heard page
15//! refreshes frequently, and activity data is time-sensitive — stale
16//! observations lose value quickly for prioritisation.
17//!
18//! # HTML Structure
19//!
20//! The page contains an HTML `<table>` where each `<tr>` after the header row
21//! has columns:
22//!
23//! | Index | Column | Example |
24//! |-------|--------|---------|
25//! | 0 | Date/time (UTC) | `2024-01-15 14:30:00` |
26//! | 1 | Callsign | `W1AW` |
27//! | 2 | ID (suffix) | `D75` |
28//! | 3 | Rptr1 | `W1AW B` |
29//! | 4 | Rptr2 | `REF001 B` |
30//! | 5 | `UrCall` | `CQCQCQ` |
31//! | 6 | Dest Rptr | `REF001 B` |
32//! | 7 | TX-Message | `Hello` |
33//!
34//! The Rptr2 and Dest Rptr fields contain the reflector callsign (first 6-7
35//! characters) and module letter. We extract the reflector callsign from the
36//! Dest Rptr field (column 6) which indicates the intended destination.
37
38use chrono::Utc;
39use scraper::{Html, Selector};
40
41use super::error::FetchError;
42use crate::db;
43
44/// ircDDB last-heard page URL.
45///
46/// The `30 0` suffix requests the last 30 minutes of activity starting from
47/// offset 0.
48const IRCDDB_URL: &str = "https://status.ircddb.net/cgi-bin/ircddb-log?30%200";
49
50/// Minimum number of columns expected in each data row.
51///
52/// Rows with fewer columns are skipped — they are likely header rows, separator
53/// rows, or malformed entries.
54const MIN_COLUMNS: usize = 7;
55
56/// A parsed activity observation extracted from one HTML table row.
57///
58/// All HTML parsing happens synchronously (no `.await`) to avoid holding
59/// non-`Send` scraper types across await points. The observations are collected
60/// into a `Vec` first, then written to the database in a second pass.
61struct Observation {
62 /// Operator callsign from column 1.
63 callsign: String,
64 /// Reflector callsign extracted from the Dest Rptr field (column 6).
65 reflector: String,
66 /// Module letter (A-Z) from the Dest Rptr field, if present.
67 module: Option<String>,
68 /// Protocol inferred from the reflector callsign prefix.
69 protocol: &'static str,
70}
71
72/// Fetches the ircDDB last-heard page and inserts activity observations.
73///
74/// Returns the number of observations successfully inserted. Rows that cannot
75/// be parsed (missing columns, empty callsign, unrecognisable reflector) are
76/// skipped with a debug log rather than failing the entire scrape.
77///
78/// # Errors
79///
80/// - [`FetchError::Http`] if the HTTP request fails.
81/// - [`FetchError::Html`] if the page contains no recognisable `<table>`.
82/// - [`FetchError::Database`] if a database insert fails.
83///
84/// # HTML parsing notes
85///
86/// The scraper is written against the expected standard `<table>/<tr>/<td>`
87/// structure. If the ircDDB site changes its layout, this function will return
88/// 0 observations (not an error) and log a warning — the data simply becomes
89/// stale until the scraper is updated.
90pub(crate) async fn fetch_and_store(
91 client: &reqwest::Client,
92 pool: &sqlx::PgPool,
93) -> Result<usize, FetchError> {
94 let body = client.get(IRCDDB_URL).send().await?.text().await?;
95
96 // Phase 1: parse HTML synchronously — scraper types are not Send, so all
97 // DOM traversal must complete before the first await point after this block.
98 let observations = parse_observations(&body)?;
99
100 // Phase 2: write parsed observations to the database.
101 let now = Utc::now();
102 let mut count = 0usize;
103
104 for obs in &observations {
105 // The activity_log table has a foreign key to reflectors.callsign, so
106 // we upsert a minimal reflector entry first to satisfy the constraint.
107 db::reflectors::upsert(pool, &obs.reflector, obs.protocol, None, None, None).await?;
108
109 db::activity::insert_observation(
110 pool,
111 &obs.reflector,
112 obs.module.as_deref(),
113 &obs.callsign,
114 "ircddb",
115 now,
116 )
117 .await?;
118
119 count += 1;
120 }
121
122 if count == 0 {
123 tracing::warn!("ircddb: scraped 0 activity observations — page layout may have changed");
124 } else {
125 tracing::info!(count, "ircddb: inserted activity observations");
126 }
127
128 Ok(count)
129}
130
131/// Parses the HTML body into a vector of owned [`Observation`] values.
132///
133/// All DOM traversal happens here, synchronously, so that the non-`Send`
134/// scraper types do not live across any `.await` boundaries.
135fn parse_observations(body: &str) -> Result<Vec<Observation>, FetchError> {
136 let document = Html::parse_document(body);
137
138 // Build CSS selectors for the table structure.
139 let table_sel =
140 Selector::parse("table").map_err(|e| FetchError::Html(format!("bad selector: {e}")))?;
141 let row_sel =
142 Selector::parse("tr").map_err(|e| FetchError::Html(format!("bad selector: {e}")))?;
143 let cell_sel =
144 Selector::parse("td").map_err(|e| FetchError::Html(format!("bad selector: {e}")))?;
145
146 // Find the first <table> on the page — the ircDDB log is the primary table.
147 let table = document
148 .select(&table_sel)
149 .next()
150 .ok_or_else(|| FetchError::Html("no <table> element found on page".to_owned()))?;
151
152 let mut observations = Vec::new();
153
154 for row in table.select(&row_sel) {
155 let cells: Vec<String> = row
156 .select(&cell_sel)
157 .map(|td| td.text().collect::<String>().trim().to_owned())
158 .collect();
159
160 // Skip header rows and malformed rows with insufficient columns.
161 if cells.len() < MIN_COLUMNS {
162 continue;
163 }
164
165 // Column 1: operator callsign.
166 let Some(callsign) = cells.get(1) else {
167 continue;
168 };
169 if callsign.is_empty() {
170 continue;
171 }
172
173 // Column 6: Dest Rptr — contains reflector callsign + module letter.
174 // Format is typically "REF001 B" (callsign padded to 7 chars + space
175 // + module).
176 let Some(dest_rptr) = cells.get(6) else {
177 continue;
178 };
179 let (reflector, module) = parse_rptr_field(dest_rptr);
180
181 // Skip rows with no recognisable reflector destination.
182 let Some(reflector) = reflector else {
183 continue;
184 };
185
186 let protocol = infer_protocol(&reflector);
187
188 observations.push(Observation {
189 callsign: callsign.clone(),
190 reflector,
191 module,
192 protocol,
193 });
194 }
195
196 Ok(observations)
197}
198
199/// Parses an RPT field (e.g. `"REF001 B"`) into a reflector callsign and
200/// optional module letter.
201///
202/// The field format is: up to 7 characters of callsign (possibly space-padded),
203/// followed by a space and a single module letter (A-Z). Returns `(None, None)`
204/// if the field is empty or does not contain a recognisable callsign.
205fn parse_rptr_field(field: &str) -> (Option<String>, Option<String>) {
206 let trimmed = field.trim();
207 if trimmed.is_empty() {
208 return (None, None);
209 }
210
211 // Try to split on the last space to separate callsign from module.
212 if let Some(last_space) = trimmed.rfind(' ') {
213 let callsign_part = trimmed.get(..last_space).unwrap_or("").trim();
214 let module_part = trimmed.get(last_space + 1..).unwrap_or("").trim();
215
216 // Module should be a single uppercase letter A-Z.
217 let module = if module_part.len() == 1
218 && module_part
219 .as_bytes()
220 .first()
221 .is_some_and(u8::is_ascii_uppercase)
222 {
223 Some(module_part.to_owned())
224 } else {
225 None
226 };
227
228 if !callsign_part.is_empty() {
229 return (Some(callsign_part.to_owned()), module);
230 }
231 }
232
233 // No space found — treat the entire field as a callsign with no module.
234 (Some(trimmed.to_owned()), None)
235}
236
237/// Infers the D-STAR protocol from a reflector callsign prefix.
238///
239/// - `REF` prefix → `"dplus"`
240/// - `XRF` or `XLX` prefix → `"dextra"`
241/// - `DCS` prefix → `"dcs"`
242/// - Anything else → `"dextra"` (conservative default for unknown reflectors)
243fn infer_protocol(callsign: &str) -> &'static str {
244 if callsign.starts_with("REF") {
245 "dplus"
246 } else if callsign.starts_with("DCS") {
247 "dcs"
248 } else if callsign.starts_with("XRF") || callsign.starts_with("XLX") {
249 "dextra"
250 } else {
251 // Unknown prefix — default to dextra as it is the most common
252 // protocol for non-standard reflectors.
253 "dextra"
254 }
255}