commit d0d0a0fa30a225f99d9367e5bf1187f047186535 from: Romain VINCENT date: Fri Jan 16 07:23:18 2026 UTC Make ChapterParser a separate module. commit - 97618e6ac9f8771f0da9e3dedc2a385983d1cb0f commit + d0d0a0fa30a225f99d9367e5bf1187f047186535 blob - 8cd11075c2c7fd3c11a88b0a0eb9cda1c364dae0 blob + d015548f1560d9974dd899631dbba2f779260023 --- eur-lex-scraper/src/parsers/enacting_terms_parser.rs +++ eur-lex-scraper/src/parsers/enacting_terms_parser.rs @@ -1,9 +1,9 @@ use scraper::{ElementRef, Selector}; use thiserror::Error; -use crate::models::enacting_terms::{Chapter, EnactingTerms}; +use crate::models::enacting_terms::EnactingTerms; use crate::parsers::article::{ArticleParser, ArticleParserError}; -use crate::parsers::section::{SectionParser, SectionParserError}; +use crate::parsers::chapter::{ChapterParser, ChapterParserError}; pub struct EnactingTermParser {} @@ -51,51 +51,3 @@ impl EnactingTermParser { Ok(enacting_terms) } } - -pub struct ChapterParser {} - -#[derive(Error, Debug, PartialEq, PartialOrd)] -pub enum ChapterParserError { - #[error("error while parsing chapter")] - GenericError, - #[error("error while parsing section")] - SectionError(SectionParserError), - #[error("error while parsing article")] - ArticleError(ArticleParserError), -} - -impl From for ChapterParserError { - fn from(value: SectionParserError) -> Self { - ChapterParserError::SectionError(value) - } -} - -impl From for ChapterParserError { - fn from(value: ArticleParserError) -> Self { - ChapterParserError::ArticleError(value) - } -} - -impl ChapterParser { - pub fn parse(element: ElementRef) -> Result { - let mut chapter = Chapter::default(); - let section_selector = - Selector::parse(r#"[id^="cpt_"][id*="sct_"]:not([id*="tit_"])}"#).unwrap(); - let section_count = element.select(§ion_selector).count(); - // If there are sections, parse them. Otherwise, it must be articles. - if section_count > 0 { - for section in element.select(§ion_selector) { - let section = SectionParser::parse(section)?; - chapter.push(section); - } - } else { - let article_selector = - Selector::parse(r#"[id^="cpt_"][id*="art_"]:not([id*=".tit"])"#).unwrap(); - for article in element.select(&article_selector) { - let article = ArticleParser::parse(article)?; - chapter.push(article); - } - } - Ok(chapter) - } -} blob - /dev/null blob + 85aa9edf97b6993d072ce51b24010110ef6d1c2d (mode 644) --- /dev/null +++ eur-lex-scraper/src/parsers/chapter.rs @@ -0,0 +1,54 @@ +use scraper::{ElementRef, Selector}; +use thiserror::Error; + +use crate::models::enacting_terms::Chapter; +use crate::parsers::article::{ArticleParser, ArticleParserError}; +use crate::parsers::section::{SectionParser, SectionParserError}; + +pub struct ChapterParser {} + +#[derive(Error, Debug, PartialEq, PartialOrd)] +pub enum ChapterParserError { + #[error("error while parsing chapter")] + GenericError, + #[error("error while parsing section")] + SectionError(SectionParserError), + #[error("error while parsing article")] + ArticleError(ArticleParserError), +} + +impl From for ChapterParserError { + fn from(value: SectionParserError) -> Self { + ChapterParserError::SectionError(value) + } +} + +impl From for ChapterParserError { + fn from(value: ArticleParserError) -> Self { + ChapterParserError::ArticleError(value) + } +} + +impl ChapterParser { + pub fn parse(element: ElementRef) -> Result { + let mut chapter = Chapter::default(); + let section_selector = + Selector::parse(r#"[id^="cpt_"][id*="sct_"]:not([id*="tit_"])}"#).unwrap(); + let section_count = element.select(§ion_selector).count(); + // If there are sections, parse them. Otherwise, it must be articles. + if section_count > 0 { + for section in element.select(§ion_selector) { + let section = SectionParser::parse(section)?; + chapter.push(section); + } + } else { + let article_selector = + Selector::parse(r#"[id^="cpt_"][id*="art_"]:not([id*=".tit"])"#).unwrap(); + for article in element.select(&article_selector) { + let article = ArticleParser::parse(article)?; + chapter.push(article); + } + } + Ok(chapter) + } +} blob - 81f83d067a5a4809cd1127b461abafba3af13bb2 blob + 4d1edacb2dad408c93395f9ed037c4148b25711c --- eur-lex-scraper/src/parsers/mod.rs +++ eur-lex-scraper/src/parsers/mod.rs @@ -1,6 +1,7 @@ pub mod act_parser; pub mod act_title_parser; pub mod article; +pub mod chapter; pub mod enacting_terms_parser; pub mod preamble_item_parser; pub mod preamble_parser;