commit 6436654c67889ef2154e666bca85e79e480a68d8 from: Romain VINCENT date: Sun Jan 18 09:55:56 2026 UTC Add number to Section model and modified SectionParser accordingly. commit - 6cfd378669b68e2439bb02e8421310d902dca804 commit + 6436654c67889ef2154e666bca85e79e480a68d8 blob - e505b7f9f73ef8f4ada2a76cc4741d178e5019f3 blob + 94274aaac69f160afe7573495bb90883e31f1c91 --- eur-lex-scraper/src/models/section.rs +++ eur-lex-scraper/src/models/section.rs @@ -3,6 +3,7 @@ use crate::models::{articles::Article, enacting_terms: #[derive(Clone, Debug, Default, PartialEq, Eq)] pub struct Section { id: String, + number: u32, title: String, items: Vec
, } @@ -29,6 +30,12 @@ impl Section { pub fn get_id(&self) -> &str { &self.id } + pub fn set_number(&mut self, number: u32) { + self.number = number; + } + pub fn get_number(&self) -> u32 { + self.number + } pub fn set_title(&mut self, title: String) { self.title = title; } blob - 4a66f70c66e89d36e60bbfac4f79d562addd26de blob + fc3c38861494e1e0faf715a29326a6b0cd9db345 --- eur-lex-scraper/src/parsers/section.rs +++ eur-lex-scraper/src/parsers/section.rs @@ -10,6 +10,8 @@ pub struct SectionParser {} pub enum SectionParserError { #[error("error while parsing unique id")] UniqueIdError, + #[error("error while parsing section number")] + NoNumber, #[error("error while parsing section")] GenericError, #[error("error while parsing the title")] @@ -37,6 +39,22 @@ impl SectionParser { None => return Err(SectionParserError::UniqueIdError), }; section.set_id(id); + // number selection + let id = match element.attr("id") { + Some(id) => id.to_string(), + None => return Err(SectionParserError::NoNumber), + }; + let number_str: Vec<&str> = id.split('.').collect(); + let number_str = match number_str.get(1) { + Some(number_str) => number_str, + None => return Err(SectionParserError::NoNumber), + }; + let number_str = number_str.replace("sct_", ""); + let number: u32 = match number_str.parse() { + Ok(number) => number, + Err(_) => return Err(SectionParserError::NoNumber), + }; + section.set_number(number); // This class should appear only once per section let section_title_selector = Selector::parse(r#".oj-ti-section-2"#).unwrap(); let title = match element.select(§ion_title_selector).next() { @@ -79,6 +97,7 @@ mod tests { "Classification of AI systems as high-risk" ); assert_eq!(section_left.get_id(), "d1e3012-1-1"); + assert_eq!(section_left.get_number(), 1); assert_eq!(section_left.len(), 2); assert_eq!(section_left.get(0).unwrap().get_number(), 6); assert_eq!(