stargazer/tier1/
ircddb.rs

1//! ircDDB last-heard HTML scraper.
2//!
3//! Scrapes `https://status.ircddb.net/cgi-bin/ircddb-log?30 0` — an HTML page
4//! listing recent D-STAR activity across the global ircDDB network. Each table
5//! row represents one heard transmission, including the operator callsign,
6//! repeater routing fields, and timestamp.
7//!
8//! Unlike the Pi-Star and XLX API fetchers which primarily discover
9//! *reflectors*, the ircDDB scraper discovers *activity*: which callsigns are
10//! transmitting through which reflectors. This activity data drives Tier 2
11//! monitoring decisions — reflectors with recent ircDDB activity are prioritised
12//! for live monitoring.
13//!
14//! **Poll interval:** every 60 seconds (default). The ircDDB last-heard page
15//! refreshes frequently, and activity data is time-sensitive — stale
16//! observations lose value quickly for prioritisation.
17//!
18//! # HTML Structure
19//!
20//! The page contains an HTML `<table>` where each `<tr>` after the header row
21//! has columns:
22//!
23//! | Index | Column | Example |
24//! |-------|--------|---------|
25//! | 0 | Date/time (UTC) | `2024-01-15 14:30:00` |
26//! | 1 | Callsign | `W1AW` |
27//! | 2 | ID (suffix) | `D75` |
28//! | 3 | Rptr1 | `W1AW  B` |
29//! | 4 | Rptr2 | `REF001 B` |
30//! | 5 | `UrCall` | `CQCQCQ` |
31//! | 6 | Dest Rptr | `REF001 B` |
32//! | 7 | TX-Message | `Hello` |
33//!
34//! The Rptr2 and Dest Rptr fields contain the reflector callsign (first 6-7
35//! characters) and module letter. We extract the reflector callsign from the
36//! Dest Rptr field (column 6) which indicates the intended destination.
37
38use chrono::Utc;
39use scraper::{Html, Selector};
40
41use super::error::FetchError;
42use crate::db;
43
44/// ircDDB last-heard page URL.
45///
46/// The `30 0` suffix requests the last 30 minutes of activity starting from
47/// offset 0.
48const IRCDDB_URL: &str = "https://status.ircddb.net/cgi-bin/ircddb-log?30%200";
49
50/// Minimum number of columns expected in each data row.
51///
52/// Rows with fewer columns are skipped — they are likely header rows, separator
53/// rows, or malformed entries.
54const MIN_COLUMNS: usize = 7;
55
56/// A parsed activity observation extracted from one HTML table row.
57///
58/// All HTML parsing happens synchronously (no `.await`) to avoid holding
59/// non-`Send` scraper types across await points. The observations are collected
60/// into a `Vec` first, then written to the database in a second pass.
61struct Observation {
62    /// Operator callsign from column 1.
63    callsign: String,
64    /// Reflector callsign extracted from the Dest Rptr field (column 6).
65    reflector: String,
66    /// Module letter (A-Z) from the Dest Rptr field, if present.
67    module: Option<String>,
68    /// Protocol inferred from the reflector callsign prefix.
69    protocol: &'static str,
70}
71
72/// Fetches the ircDDB last-heard page and inserts activity observations.
73///
74/// Returns the number of observations successfully inserted. Rows that cannot
75/// be parsed (missing columns, empty callsign, unrecognisable reflector) are
76/// skipped with a debug log rather than failing the entire scrape.
77///
78/// # Errors
79///
80/// - [`FetchError::Http`] if the HTTP request fails.
81/// - [`FetchError::Html`] if the page contains no recognisable `<table>`.
82/// - [`FetchError::Database`] if a database insert fails.
83///
84/// # HTML parsing notes
85///
86/// The scraper is written against the expected standard `<table>/<tr>/<td>`
87/// structure. If the ircDDB site changes its layout, this function will return
88/// 0 observations (not an error) and log a warning — the data simply becomes
89/// stale until the scraper is updated.
90pub(crate) async fn fetch_and_store(
91    client: &reqwest::Client,
92    pool: &sqlx::PgPool,
93) -> Result<usize, FetchError> {
94    let body = client.get(IRCDDB_URL).send().await?.text().await?;
95
96    // Phase 1: parse HTML synchronously — scraper types are not Send, so all
97    // DOM traversal must complete before the first await point after this block.
98    let observations = parse_observations(&body)?;
99
100    // Phase 2: write parsed observations to the database.
101    let now = Utc::now();
102    let mut count = 0usize;
103
104    for obs in &observations {
105        // The activity_log table has a foreign key to reflectors.callsign, so
106        // we upsert a minimal reflector entry first to satisfy the constraint.
107        db::reflectors::upsert(pool, &obs.reflector, obs.protocol, None, None, None).await?;
108
109        db::activity::insert_observation(
110            pool,
111            &obs.reflector,
112            obs.module.as_deref(),
113            &obs.callsign,
114            "ircddb",
115            now,
116        )
117        .await?;
118
119        count += 1;
120    }
121
122    if count == 0 {
123        tracing::warn!("ircddb: scraped 0 activity observations — page layout may have changed");
124    } else {
125        tracing::info!(count, "ircddb: inserted activity observations");
126    }
127
128    Ok(count)
129}
130
131/// Parses the HTML body into a vector of owned [`Observation`] values.
132///
133/// All DOM traversal happens here, synchronously, so that the non-`Send`
134/// scraper types do not live across any `.await` boundaries.
135fn parse_observations(body: &str) -> Result<Vec<Observation>, FetchError> {
136    let document = Html::parse_document(body);
137
138    // Build CSS selectors for the table structure.
139    let table_sel =
140        Selector::parse("table").map_err(|e| FetchError::Html(format!("bad selector: {e}")))?;
141    let row_sel =
142        Selector::parse("tr").map_err(|e| FetchError::Html(format!("bad selector: {e}")))?;
143    let cell_sel =
144        Selector::parse("td").map_err(|e| FetchError::Html(format!("bad selector: {e}")))?;
145
146    // Find the first <table> on the page — the ircDDB log is the primary table.
147    let table = document
148        .select(&table_sel)
149        .next()
150        .ok_or_else(|| FetchError::Html("no <table> element found on page".to_owned()))?;
151
152    let mut observations = Vec::new();
153
154    for row in table.select(&row_sel) {
155        let cells: Vec<String> = row
156            .select(&cell_sel)
157            .map(|td| td.text().collect::<String>().trim().to_owned())
158            .collect();
159
160        // Skip header rows and malformed rows with insufficient columns.
161        if cells.len() < MIN_COLUMNS {
162            continue;
163        }
164
165        // Column 1: operator callsign.
166        let Some(callsign) = cells.get(1) else {
167            continue;
168        };
169        if callsign.is_empty() {
170            continue;
171        }
172
173        // Column 6: Dest Rptr — contains reflector callsign + module letter.
174        // Format is typically "REF001 B" (callsign padded to 7 chars + space
175        // + module).
176        let Some(dest_rptr) = cells.get(6) else {
177            continue;
178        };
179        let (reflector, module) = parse_rptr_field(dest_rptr);
180
181        // Skip rows with no recognisable reflector destination.
182        let Some(reflector) = reflector else {
183            continue;
184        };
185
186        let protocol = infer_protocol(&reflector);
187
188        observations.push(Observation {
189            callsign: callsign.clone(),
190            reflector,
191            module,
192            protocol,
193        });
194    }
195
196    Ok(observations)
197}
198
199/// Parses an RPT field (e.g. `"REF001 B"`) into a reflector callsign and
200/// optional module letter.
201///
202/// The field format is: up to 7 characters of callsign (possibly space-padded),
203/// followed by a space and a single module letter (A-Z). Returns `(None, None)`
204/// if the field is empty or does not contain a recognisable callsign.
205fn parse_rptr_field(field: &str) -> (Option<String>, Option<String>) {
206    let trimmed = field.trim();
207    if trimmed.is_empty() {
208        return (None, None);
209    }
210
211    // Try to split on the last space to separate callsign from module.
212    if let Some(last_space) = trimmed.rfind(' ') {
213        let callsign_part = trimmed.get(..last_space).unwrap_or("").trim();
214        let module_part = trimmed.get(last_space + 1..).unwrap_or("").trim();
215
216        // Module should be a single uppercase letter A-Z.
217        let module = if module_part.len() == 1
218            && module_part
219                .as_bytes()
220                .first()
221                .is_some_and(u8::is_ascii_uppercase)
222        {
223            Some(module_part.to_owned())
224        } else {
225            None
226        };
227
228        if !callsign_part.is_empty() {
229            return (Some(callsign_part.to_owned()), module);
230        }
231    }
232
233    // No space found — treat the entire field as a callsign with no module.
234    (Some(trimmed.to_owned()), None)
235}
236
237/// Infers the D-STAR protocol from a reflector callsign prefix.
238///
239/// - `REF` prefix → `"dplus"`
240/// - `XRF` or `XLX` prefix → `"dextra"`
241/// - `DCS` prefix → `"dcs"`
242/// - Anything else → `"dextra"` (conservative default for unknown reflectors)
243fn infer_protocol(callsign: &str) -> &'static str {
244    if callsign.starts_with("REF") {
245        "dplus"
246    } else if callsign.starts_with("DCS") {
247        "dcs"
248    } else if callsign.starts_with("XRF") || callsign.starts_with("XLX") {
249        "dextra"
250    } else {
251        // Unknown prefix — default to dextra as it is the most common
252        // protocol for non-standard reflectors.
253        "dextra"
254    }
255}