Commit Diff


commit - a85d745221f8b5a40c756c3ab8a60e8e49f3d2ca
commit + 6a644cebe360b81e85ca7efb761709257139caed
blob - 67482e9de4b056003fe953336d319b19e7b00997
blob + f0222648eb27d3bf21d1157bb87a81e269c94ae4
--- eur-lex-scraper-naive/src/parsers/enacting_terms_parser.rs
+++ eur-lex-scraper-naive/src/parsers/enacting_terms_parser.rs
@@ -2,6 +2,7 @@ use scraper::{ElementRef, Selector};
 use thiserror::Error;
 
 use crate::models::enacting_terms::{Chapter, EnactingTerms};
+use crate::parsers::article::{ArticleParser, ArticleParserError};
 use crate::parsers::section::{SectionParser, SectionParserError};
 
 pub struct EnactingTermParser {}
@@ -37,7 +38,7 @@ pub enum ChapterParserError {
     #[error("error while parsing section")]
     SectionError(SectionParserError),
     #[error("error while parsing article")]
-    ArticleError(),
+    ArticleError(ArticleParserError),
 }
 
 impl From<SectionParserError> for ChapterParserError {
@@ -46,20 +47,30 @@ impl From<SectionParserError> for ChapterParserError {
     }
 }
 
+impl From<ArticleParserError> for ChapterParserError {
+    fn from(value: ArticleParserError) -> Self {
+        ChapterParserError::ArticleError(value)
+    }
+}
+
 impl ChapterParser {
     pub fn parse(element: ElementRef) -> Result<Chapter, ChapterParserError> {
         let mut chapter = Chapter::default();
-        // Since we are in a chapter already, cpt_ should refer to section
-        let section_selector = Selector::parse("[id^=cpt_]").unwrap();
+        let section_selector =
+            Selector::parse(r#"[id^="cpt_"][id*="sct_"]:not([id*="tit_"])}"#).unwrap();
         let section_count = element.select(&section_selector).count();
+        // If there are sections, parse them. Otherwise, it must be articles.
         if section_count > 0 {
             for section in element.select(&section_selector) {
                 let section = SectionParser::parse(section)?;
                 chapter.push(section);
             }
         } else {
-            // should parse article
-            todo!()
+            let article_selector = Selector::parse(r#"[id^="art_"]:not([id*=".tit"])"#).unwrap();
+            for article in element.select(&article_selector) {
+                let article = ArticleParser::parse(article)?;
+                chapter.push(article);
+            }
         }
         Ok(chapter)
     }