R VINCENT Repositories

Commit Diff

Commit:: 5e29929b3959d317321b463d031ce893f92c8c61
From:: Romain VINCENT <contact@rvincent.eu>
Date:: Sat Jan 10 14:45:10 2026 UTC
Message:: Implement section parser.
Actions:: Patch | Tree
commit - 6a3f14362066a116d40e8e68249bb6611aac5847
commit + 5e29929b3959d317321b463d031ce893f92c8c61
blob - 15c941f6a64b1aca3a0d207f5af15de9656aa85a
blob + aecc33e03d3cc640dd0e922a36574765f9ef04d7
--- eur-lex-scraper-naive/src/parsers/article.rs
+++ eur-lex-scraper-naive/src/parsers/article.rs
@@ -158,7 +158,7 @@ mod tests {
     }
 
     #[test]
-    fn item_parsing_citation() {
+    fn parsing_article() {
         let html = Html::parse_fragment(&get_article1_html());
         let selector = Selector::parse("[id^=art_]").unwrap();
         let element_ref = html.select(&selector).next().unwrap();
blob - 4da206bae0b7e09bcf6f92c6c012c644330f763d
blob + 67482e9de4b056003fe953336d319b19e7b00997
--- eur-lex-scraper-naive/src/parsers/enacting_terms_parser.rs
+++ eur-lex-scraper-naive/src/parsers/enacting_terms_parser.rs
@@ -1,8 +1,8 @@
 use scraper::{ElementRef, Selector};
 use thiserror::Error;
 
-use crate::models::enacting_terms::{Chapter, EnactingTerms, Section};
-use crate::parsers::article::{ArticleParser, ArticleParserError};
+use crate::models::enacting_terms::{Chapter, EnactingTerms};
+use crate::parsers::section::{SectionParser, SectionParserError};
 
 pub struct EnactingTermParser {}
 
@@ -64,31 +64,3 @@ impl ChapterParser {
         Ok(chapter)
     }
 }
-
-pub struct SectionParser {}
-
-#[derive(Error, Debug, PartialEq, PartialOrd)]
-pub enum SectionParserError {
-    #[error("error while parsing section")]
-    GenericError,
-    #[error("error parsing article")]
-    ArticleError(ArticleParserError),
-}
-
-impl From<ArticleParserError> for SectionParserError {
-    fn from(value: ArticleParserError) -> Self {
-        Self::ArticleError(value)
-    }
-}
-
-impl SectionParser {
-    pub fn parse(element: ElementRef) -> Result<Section, SectionParserError> {
-        let mut section = Section::default();
-        let article_selector = Selector::parse("[id^=art_]").unwrap();
-        for article in element.select(&article_selector) {
-            let article = ArticleParser::parse(article)?;
-            section.push(article)
-        }
-        Ok(section)
-    }
-}
blob - beb3511efeac1aea3dded27c39b515121ce0a6a2
blob + 81f83d067a5a4809cd1127b461abafba3af13bb2
--- eur-lex-scraper-naive/src/parsers/mod.rs
+++ eur-lex-scraper-naive/src/parsers/mod.rs
@@ -4,3 +4,4 @@ pub mod article;
 pub mod enacting_terms_parser;
 pub mod preamble_item_parser;
 pub mod preamble_parser;
+pub mod section;
blob - /dev/null
blob + c5371e6926b80bd6c694beed7b1a850dd88803ee (mode 644)
--- /dev/null
+++ eur-lex-scraper-naive/src/parsers/section.rs
@@ -0,0 +1,446 @@
+use scraper::{ElementRef, Selector};
+use thiserror::Error;
+
+use crate::models::enacting_terms::Section;
+use crate::parsers::article::{ArticleParser, ArticleParserError};
+
+pub struct SectionParser {}
+
+#[derive(Error, Debug, PartialEq, PartialOrd)]
+pub enum SectionParserError {
+    #[error("error while parsing section")]
+    GenericError,
+    #[error("error parsing article")]
+    ArticleError(ArticleParserError),
+}
+
+impl From<ArticleParserError> for SectionParserError {
+    fn from(value: ArticleParserError) -> Self {
+        Self::ArticleError(value)
+    }
+}
+
+impl SectionParser {
+    pub fn parse(element: ElementRef) -> Result<Section, SectionParserError> {
+        let mut section = Section::default();
+        // select article but not titles
+        let article_selector = Selector::parse(r#"[id^="art_"]:not([id*=".tit"])"#).unwrap();
+        for article in element.select(&article_selector) {
+            let article = ArticleParser::parse(article)?;
+            section.push(article)
+        }
+        Ok(section)
+    }
+}
+#[cfg(test)]
+mod tests {
+
+    use super::*;
+    use scraper::{Html, Selector};
+
+    fn get_section_html() -> String {
+        let section_html = r#"
+               <div id="cpt_III.sct_1">
+                  <p id="d1e3012-1-1" class="oj-ti-section-1">
+                     <span class="oj-italic">SECTION 1</span>
+                  </p>
+                  <div class="eli-title" id="cpt_III.sct_1.tit_1">
+                     <p id="L_01689EN.01000101-d-004" class="oj-ti-section-2">
+                        <span class="oj-bold">
+                           <span class="oj-italic">Classification of AI systems as high-risk</span>
+                        </span>
+                     </p>
+                  </div>
+                  <div class="eli-subdivision" id="art_6">
+                     <p id="d1e3022-1-1" class="oj-ti-art">Article 6</p>
+                     <div class="eli-title" id="art_6.tit_1">
+                        <p class="oj-sti-art">Classification rules for high-risk AI systems</p>
+                     </div>
+                     <div id="006.001">
+                        <p class="oj-normal">1.   Irrespective of whether an AI system is placed on the market or put into service independently of the products referred to in points (a) and (b), that AI system shall be considered to be high-risk where both of the following conditions are fulfilled:</p>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(a)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the AI system is intended to be used as a safety component of a product, or the AI system is itself a product, covered by the Union harmonisation legislation listed in Annex I;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(b)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the product whose safety component pursuant to point (a) is the AI system, or the AI system itself as a product, is required to undergo a third-party conformity assessment, with a view to the placing on the market or the putting into service of that product pursuant to the Union harmonisation legislation listed in Annex I.</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                     </div>
+                     <div id="006.002">
+                        <p class="oj-normal">2.   In addition to the high-risk AI systems referred to in paragraph 1, AI systems referred to in Annex III shall be considered to be high-risk.</p>
+                     </div>
+                     <div id="006.003">
+                        <p class="oj-normal">3.   By derogation from paragraph 2, an AI system referred to in Annex III shall not be considered to be high-risk where it does not pose a significant risk of harm to the health, safety or fundamental rights of natural persons, including by not materially influencing the outcome of decision making.</p>
+                        <p class="oj-normal">The first subparagraph shall apply where any of the following conditions is fulfilled:</p>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(a)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the AI system is intended to perform a narrow procedural task;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(b)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the AI system is intended to improve the result of a previously completed human activity;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(c)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the AI system is intended to detect decision-making patterns or deviations from prior decision-making patterns and is not meant to replace or influence the previously completed human assessment, without proper human review; or</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(d)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the AI system is intended to perform a preparatory task to an assessment relevant for the purposes of the use cases listed in Annex III.</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <p class="oj-normal">Notwithstanding the first subparagraph, an AI system referred to in Annex III shall always be considered to be high-risk where the AI system performs profiling of natural persons.</p>
+                     </div>
+                     <div id="006.004">
+                        <p class="oj-normal">4.   A provider who considers that an AI system referred to in Annex III is not high-risk shall document its assessment before that system is placed on the market or put into service. Such provider shall be subject to the registration obligation set out in Article 49(2). Upon request of national competent authorities, the provider shall provide the documentation of the assessment.</p>
+                     </div>
+                     <div id="006.005">
+                        <p class="oj-normal">5.   The Commission shall, after consulting the European Artificial Intelligence Board (the ‘Board’), and no later than 2 February 2026, provide guidelines specifying the practical implementation of this Article in line with Article 96 together with a comprehensive list of practical examples of use cases of AI systems that are high-risk and not high-risk.</p>
+                     </div>
+                     <div id="006.006">
+                        <p class="oj-normal">6.   The Commission is empowered to adopt delegated acts in accordance with Article 97 in order to amend paragraph 3, second subparagraph, of this Article by adding new conditions to those laid down therein, or by modifying them, where there is concrete and reliable evidence of the existence of AI systems that fall under the scope of Annex III, but do not pose a significant risk of harm to the health, safety or fundamental rights of natural persons.</p>
+                     </div>
+                     <div id="006.007">
+                        <p class="oj-normal">7.   The Commission shall adopt delegated acts in accordance with Article 97 in order to amend paragraph 3, second subparagraph, of this Article by deleting any of the conditions laid down therein, where there is concrete and reliable evidence that this is necessary to maintain the level of protection of health, safety and fundamental rights provided for by this Regulation.</p>
+                     </div>
+                     <div id="006.008">
+                        <p class="oj-normal">8.   Any amendment to the conditions laid down in paragraph 3, second subparagraph, adopted in accordance with paragraphs 6 and 7 of this Article shall not decrease the overall level of protection of health, safety and fundamental rights provided for by this Regulation and shall ensure consistency with the delegated acts adopted pursuant to Article 7(1), and take account of market and technological developments.</p>
+                     </div>
+                  </div>
+                  <div class="eli-subdivision" id="art_7">
+                     <p id="d1e3119-1-1" class="oj-ti-art">Article 7</p>
+                     <div class="eli-title" id="art_7.tit_1">
+                        <p class="oj-sti-art">Amendments to Annex III</p>
+                     </div>
+                     <div id="007.001">
+                        <p class="oj-normal">1.   The Commission is empowered to adopt delegated acts in accordance with Article 97 to amend Annex III by adding or modifying use-cases of high-risk AI systems where both of the following conditions are fulfilled:</p>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(a)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the AI systems are intended to be used in any of the areas listed in Annex III;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(b)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the AI systems pose a risk of harm to health and safety, or an adverse impact on fundamental rights, and that risk is equivalent to, or greater than, the risk of harm or of adverse impact posed by the high-risk AI systems already referred to in Annex III.</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                     </div>
+                     <div id="007.002">
+                        <p class="oj-normal">2.   When assessing the condition under paragraph 1, point (b), the Commission shall take into account the following criteria:</p>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(a)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the intended purpose of the AI system;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(b)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the extent to which an AI system has been used or is likely to be used;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(c)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the nature and amount of the data processed and used by the AI system, in particular whether special categories of personal data are processed;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(d)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the extent to which the AI system acts autonomously and the possibility for a human to override a decision or recommendations that may lead to potential harm;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(e)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the extent to which the use of an AI system has already caused harm to health and safety, has had an adverse impact on fundamental rights or has given rise to significant concerns in relation to the likelihood of such harm or adverse impact, as demonstrated, for example, by reports or documented allegations submitted to national competent authorities or by other reports, as appropriate;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(f)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the potential extent of such harm or such adverse impact, in particular in terms of its intensity and its ability to affect multiple persons or to disproportionately affect a particular group of persons;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(g)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the extent to which persons who are potentially harmed or suffer an adverse impact are dependent on the outcome produced with an AI system, in particular because for practical or legal reasons it is not reasonably possible to opt-out from that outcome;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(h)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the extent to which there is an imbalance of power, or the persons who are potentially harmed or suffer an adverse impact are in a vulnerable position in relation to the deployer of an AI system, in particular due to status, authority, knowledge, economic or social circumstances, or age;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(i)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the extent to which the outcome produced involving an AI system is easily corrigible or reversible, taking into account the technical solutions available to correct or reverse it, whereby outcomes having an adverse impact on health, safety or fundamental rights, shall not be considered to be easily corrigible or reversible;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(j)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the magnitude and likelihood of benefit of the deployment of the AI system for individuals, groups, or society at large, including possible improvements in product safety;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(k)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the extent to which existing Union law provides for:</p>
+                                    <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                                       <col width="4%"/>
+                                       <col width="96%"/>
+                                       <tbody>
+                                          <tr>
+                                             <td valign="top"  >
+                                                <p class="oj-normal">(i)</p>
+                                             </td>
+                                             <td valign="top"  >
+                                                <p class="oj-normal">effective measures of redress in relation to the risks posed by an AI system, with the exclusion of claims for damages;</p>
+                                             </td>
+                                          </tr>
+                                       </tbody>
+                                    </table>
+                                    <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                                       <col width="4%"/>
+                                       <col width="96%"/>
+                                       <tbody>
+                                          <tr>
+                                             <td valign="top"  >
+                                                <p class="oj-normal">(ii)</p>
+                                             </td>
+                                             <td valign="top"  >
+                                                <p class="oj-normal">effective measures to prevent or substantially minimise those risks.</p>
+                                             </td>
+                                          </tr>
+                                       </tbody>
+                                    </table>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                     </div>
+                     <div id="007.003">
+                        <p class="oj-normal">3.   The Commission is empowered to adopt delegated acts in accordance with Article 97 to amend the list in Annex III by removing high-risk AI systems where both of the following conditions are fulfilled:</p>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(a)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the high-risk AI system concerned no longer poses any significant risks to fundamental rights, health or safety, taking into account the criteria listed in paragraph 2;</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                        <table width="100%" border="0" cellspacing="0" cellpadding="0">
+                           <col width="4%"/>
+                           <col width="96%"/>
+                           <tbody>
+                              <tr>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">(b)</p>
+                                 </td>
+                                 <td valign="top"  >
+                                    <p class="oj-normal">the deletion does not decrease the overall level of protection of health, safety and fundamental rights under Union law.</p>
+                                 </td>
+                              </tr>
+                           </tbody>
+                        </table>
+                     </div>
+                  </div>
+               </div>
+        "#;
+        section_html.to_string()
+    }
+
+    #[test]
+    fn parsing_article() {
+        let html = Html::parse_fragment(&get_section_html());
+        let selector = Selector::parse(r#"[id*="sct_"]:not([id*=".tit_"])"#).unwrap();
+        let element_ref = html.select(&selector).next().unwrap();
+        let section_left = SectionParser::parse(element_ref).unwrap();
+        assert_eq!(section_left.items.len(), 2);
+        assert_eq!(section_left.items.get(0).unwrap().number, 6);
+        assert_eq!(section_left.items.get(1).unwrap().number, 7);
+
+        /*
+        let article_right = Article {
+            number: 1,
+            text: html2text(&element_ref.inner_html()),
+        };
+        assert_eq!(article_left, article_right)
+        */
+    }
+}