
    ̆h?                       S r SSKJr  SSKrSSKrSSKrSSKJrJ	r	  \(       a  SSK
Jr  \R                  R                  S5      r\R                  R                  \5      r\R"                  R%                  \5        \\R&                  S'   \R(                  " S5      \l        \R(                  " S	5      \l        \R(                  " S
5      \l        \R.                  \l        \R(                  " S\R2                  5      \l        \R(                  " S\R2                  5      \l        \R(                  " S5      r " S S\R:                  5      rg)a  
This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
A copy is imported rather than the module being directly imported as this ensures that the user can import
and  use the unmodified library for their own needs.
    )annotationsN)TYPE_CHECKINGSequence)Markdownzhtml.parser
htmlparserz<[a-zA-Z]|</>z\?>z&([a-zA-Z][-.a-zA-Z0-9]*);a  
  <[a-zA-Z][^`\t\n\r\f />\x00]*       # tag name <= added backtick here
  (?:[\s/]*                           # optional whitespace before attribute name
    (?:(?<=['"\s/])[^`\s/>][^\s/=>]*  # attribute name <= added backtick here
      (?:\s*=+\s*                     # value indicator
        (?:'[^']*'                    # LITA-enclosed value
          |"[^"]*"                    # LIT-enclosed value
          |(?!['"])[^`>\s]*           # bare value <= added backtick here
         )
         (?:\s*,)*                    # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                 # trailing whitespace
a  
  [a-zA-Z][^`\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^`\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:=                            # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
z^([ ]*\n){2}c                  F  ^  \ rS rSr% SrSU 4S jjrU 4S jrU 4S jr\SS j5       r	S S jr
S!S jrS"S	 jrS#S
 jrS$S jrS%S jrS#S jrS&S jrS&S jrS$S jrS'U 4S jjrS$S jrS$S jrS$S jrS(U 4S jjrS(U 4S jjrS)S*U 4S jjjrSrS\S'   S+S jrS(S jrSrU =r $ ),HTMLExtractor\   z
Extract raw HTML from text.

The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
[`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
is stored in `cleandoc` as a list of strings.
c                   > SU;  a  SUS'   [        S/5      U l        S/U l        SU l        [        TU ]  " U0 UD6  Xl        g )Nconvert_charrefsFhrr   )set
empty_tagslineno_start_cacheoverride_comment_updatesuper__init__md)selfr   argskwargs	__class__s       K/opt/services/DDDS/venv/lib/python3.13/site-packages/markdown/htmlparser.pyr   HTMLExtractor.__init__e   sP    V+).F%& tf+#$#',$ 	$)&)    c                x   > SU l         SU l        / U l        / U l        / U l        S/U l        [        TU ]  5         g)z1Reset this instance.  Loses all unprocessed data.Fr   N)inrawintailstack_cachecleandocr   r   resetr   r   s    r   r"   HTMLExtractor.resett   s9    
 "
!##%#$#r   c                   > [         TU ]  5         [        U R                  5      (       al  U R                  (       a@  U R
                  (       d/  U R                  [        R                  U R                  5      5        OU R                  U R                  5        [        U R                  5      (       a_  U R                  R                  U R                  R                  R                  SR                  U R                  5      5      5        / U l	        gg)zHandle any buffered data. N)r   closelenrawdatar   
cdata_elemhandle_datar   unescaper    r!   appendr   	htmlStashstorejoinr#   s    r   r'   HTMLExtractor.close   s    t|| $$T__  !4!4T\\!BC  .t{{MM  !2!2!8!89M!NODK r   c                n   [        [        U R                  5      S-
  U R                  S-
  5       Hg  nU R                  U   nU R                  R                  SU5      nUS:X  a  [        U R                  5      nU R                  R                  US-   5        Mi     U R                  U R                  S-
     $ )zHReturns char index in `self.rawdata` for the start of the current line.    
)ranger(   r   linenor)   findr-   )r   iilast_line_start_poslf_poss       r   line_offsetHTMLExtractor.line_offset   s     D334Q6AFB"&"9"9""=\\&&t-@AF|T\\*##**6!84 G &&t{{1}55r   c                    U R                   S:X  a  gU R                   S:  a  gU R                  U R                  U R                  U R                   -    R                  5       S:H  $ )zn
Returns True if current position is at start of line.

Allows for up to three blank spaces at start of line.
r   T   Fr&   )offsetr)   r<   stripr   s    r   at_line_startHTMLExtractor.at_line_start   sV     ;;!;;?||D,,T-=-=-KLRRTXZZZr   c                    U R                   U R                  -   n[        R                  R	                  U R
                  U5      nU(       a  U R
                  X#R                  5        $ SR                  U5      $ )z
Returns the text of the end tag.

If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`.
z</{}>)r<   r@   r   	endendtagsearchr)   endformat)r   tagstartms       r   get_endtag_textHTMLExtractor.get_endtag_text   s]       4;;.  ''e<<<eeg.. >>#&&r   c                ^   XR                   ;   a  U R                  X5        g U R                  R                  U5      (       aY  U R                  (       d&  U R                  5       (       a3  U R                  (       d"  SU l        U R                  R                  S5        U R                  5       nU R                  (       a7  U R                  R                  U5        U R                  R                  U5        g U R                  R                  U5        XR                  ;   a  U R                  5         g g )NTr4   )r   handle_startendtagr   is_block_levelr   rC   r   r!   r-   get_starttag_textr   r    CDATA_CONTENT_ELEMENTSclear_cdata_mode)r   rJ   attrstexts       r   handle_starttagHTMLExtractor.handle_starttag   s    //!##C/77!!#&&DKKD<N<N<P<PY]YcYcDJMM  &%%'::JJc"KKt$MM  &111%%' 2r   c                j   U R                  U5      nU R                  (       Gau  U R                  R                  U5        XR                  ;   aC  U R                  (       a2  U R                  R                  5       U:X  a  OU R                  (       a  M2  [        U R                  5      S:X  a  [        R                  U R                  U R                  U R                  -   [        U5      -   S  5      (       a  U R                  R                  S5        OSU l        SU l        U R                  R                  U R                  R                  R!                  SR#                  U R                  5      5      5        U R                  R                  S5        / U l        g g U R                  R                  U5        g )Nr   r4   TFr&   

)rM   r   r    r-   r   popr(   blank_line_rematchr)   r<   r@   r   r!   r   r.   r/   r0   )r   rJ   rV   s      r   handle_endtagHTMLExtractor.handle_endtag   s1   ##C(:::KKt$jj jjzz~~'3. jjj 4::!# &&t||D4D4Dt{{4RUXY]U^4^4_'`aaKK&&t, #'DK"
$$TWW%6%6%<%<RWWT[[=Q%RS$$V,  $ MM  &r   c                    U R                   (       a  SU;   a  SU l         U R                  (       a  U R                  R                  U5        g U R                  R                  U5        g )Nr4   F)r   r   r    r-   r!   r   datas     r   r+   HTMLExtractor.handle_data   sA    ;;44<DK::KKt$MM  &r   c                &   U R                   (       d  U R                  (       a  U R                  R                  U5        gU R	                  5       (       Ga"  U(       Ga  [
        R                  U R                  U R                  U R                  -   [        U5      -   S 5      (       a  US-  nOSU l        U R                  (       a  U R                  S   OSnUR                  S5      (       d1  UR                  S5      (       a  U R                  R                  S5        U R                  R                  U R                  R                  R                  U5      5        U R                  R                  S5        gU R                  R                  U5        g)zHandle empty tags (`<data>`). Nr4   Tr5   r&   rZ   )r   r   r    r-   rC   r\   r]   r)   r<   r@   r(   r!   endswithr   r.   r/   )r   rb   is_blockitems       r   handle_empty_tagHTMLExtractor.handle_empty_tag   s   ::KKt$!!h""4<<0@0@4;;0NQTUYQZ0Z0[#\]] #(,4==$2D==((T]]4-@-@$$T*MM  !2!2!8!8!>?MM  (MM  &r   c                r    U R                  U R                  5       U R                  R                  U5      S9  g )Nrf   )rh   rR   r   rQ   )r   rJ   rU   s      r   rP    HTMLExtractor.handle_startendtag  s.    d446AWAWX[A\]r   c                B    U R                  SR                  U5      SS9  g )Nz&#{};Frk   rh   rI   r   names     r   handle_charrefHTMLExtractor.handle_charref	  s    gnnT2UCr   c                B    U R                  SR                  U5      SS9  g )Nz&{};Frk   rn   ro   s     r   handle_entityrefHTMLExtractor.handle_entityref  s    fmmD1EBr   c                    U R                   U R                  -   [        U5      -   S-   nU R                  X"S-    S:w  a  U R	                  S5        SU l        g U R                  SR                  U5      SS9  g )N   r?   z--><Tz	<!--{}-->rk   )r<   r@   r(   r)   r+   r   rh   rI   )r   rb   is      r   handle_commentHTMLExtractor.handle_comment  sp    t{{*SY6:<<a% E)S!+/D(k006Fr   c                Z   > U R                   (       a  SU l         SnSn[        TU ]	  X5      $ )NFr   r3   )r   r   	updatepos)r   ry   jr   s      r   r}   HTMLExtractor.updatepos  s.    ''+0D(AAw &&r   c                B    U R                  SR                  U5      SS9  g )Nz<!{}>Trk   rn   ra   s     r   handle_declHTMLExtractor.handle_decl  s    gnnT2TBr   c                B    U R                  SR                  U5      SS9  g )Nz<?{}?>Trk   rn   ra   s     r   	handle_piHTMLExtractor.handle_pi"  s    hood3dCr   c                v    UR                  S5      (       a  SOSnU R                  SR                  X5      SS9  g )NzCDATA[z]]>z]>z<![{}{}Trk   )
startswithrh   rI   )r   rb   rH   s      r   unknown_declHTMLExtractor.unknown_decl%  s5    x00edi..t9DIr   c                   > U R                  5       (       d  U R                  (       a  [        TU ]  U5      $ U R	                  S5        US-   $ )Nz<?   )rC   r   r   parse_pir+   )r   ry   r   s     r   r   HTMLExtractor.parse_pi)  sA    4;;7#A&& 	1ur   c                j  > U R                  5       (       d  U R                  (       aw  U R                  XS-    S:X  aS  U R                  XS-    S:X  d>  U R                  U5      nUS:X  a%  U R	                  U R                  XS-    5        US-   $ U$ [
        TU ]  U5      $ U R	                  S5        US-   $ )	Nr?   z<![	   z	<![CDATA[r5   r3   z<!r   )rC   r   r)   parse_bogus_commentr+   r   parse_html_declaration)r   ry   resultr   s      r   r   $HTMLExtractor.parse_html_declaration1  s    4;;||Ac"e+DLLQ34G;4V 11!4R<$$T\\!E%:;q5L71!44 	1ur   c                l   > [         TU ]  X5      nUS:X  a  gU R                  U R                  X SS9  U$ )Nr5   Frk   )r   r   rh   r)   )r   ry   reportposr   s       r   r   !HTMLExtractor.parse_bogus_commentA  s?     g)!4"9dll11EB
r   Nz
str | None_HTMLExtractor__starttag_textc                    U R                   $ )z)Return full source of start tag: `<...>`.)r   rB   s    r   rR   HTMLExtractor.get_starttag_textP  s    ###r   c                   U R                   XS-    S:X  a%  U R                  U R                   XS-    5        US-   $ S U l        U R                  U5      nUS:  a%  U R                  U R                   XS-    5        US-   $ U R                   nX1U U l        / n[        R
                  R                  X1S-   5      nU(       d   S5       eUR                  5       nUR                  S5      R                  5       =U l
        nXb:  a  [        R                  R                  X65      nU(       d  OUR                  SSS5      u  pnU
(       d  S nO0US S Ss=:X  a	  USS  :X  d  O  US S S	s=:X  a	  USS  :X  a  O  OUSS nU(       a  [        R                  U5      nUR                  U	R                  5       U45        UR                  5       nXb:  a  M  X6U R                  5       nUS
;  a  U R                  5       u  pSU R                  ;   aO  XR                  R!                  S5      -   n[#        U R                  5      U R                  R%                  S5      -
  nOU[#        U R                  5      -   nU R                  X1U 5        U$ UR'                  S5      (       a  U R)                  Xt5        U$ XpR*                  ;   a  U R-                  U5        U R/                  Xt5        U$ )Nr?   z</>r   r3   z#unexpected call to parse_starttag()r   'r5   ")>/>r4   r   )r)   r+   r   check_for_whole_start_tagr   tagfind_tolerantr]   rH   grouplowerlasttagattrfind_tolerantr,   r-   rA   getposcountr(   rfindre   rP   rS   set_cdata_moderW   )r   ry   endposr)   rU   r]   krJ   rL   attrnamerest	attrvaluerH   r7   r@   s                  r   parse_starttagHTMLExtractor.parse_starttagT  s   <<a% E)T\\!E23q5L#//2A:T\\!E23q5L,,&0 ++11'Q3?;;;uIIK"[[^1133sj,,227>A()1a(8%HI 	2A$8)BC.82A#7237%aO	&//	:	LL(..*I67A j %%'k!![[]NFt+++"6"6"<"<T"BBT112//55d;<  #d&:&:";;Wv./M<<##C/  111##C(  ,r   )__starttag_textr    r!   r   r   r   r   r   r   r   r   )r   r   )returnint)r   bool)rJ   strr   r   )rJ   r   rU   zSequence[tuple[str, str]])rJ   r   )rb   r   )rb   r   rf   r   )rp   r   )ry   r   r~   r   r   r   )ry   r   r   r   )r   )ry   r   r   r   r   r   )r   r   )!__name__
__module____qualname____firstlineno____doc__r   r"   r'   propertyr<   rC   rM   rW   r^   r+   rh   rP   rq   rt   rz   r}   r   r   r   r   r   r   r   __annotations__rR   r   __static_attributes____classcell__)r   s   @r   r	   r	   \   s    	 
6 
6['(*'6''.^DCG'CDJ   #'OZ&$6 6r   r	   )r   
__future__r   reimportlib.util	importlibsystypingr   r   markdownr   util	find_specspecmodule_from_specr   loaderexec_modulemodulescompilestarttagopenpiclose	entityref
incompleteVERBOSElocatestarttagend_tolerantlocatetagendr\   
HTMLParserr	    r   r   <module>r      s  ( # 	  
 *!
 ~~.^^,,T2
   
 #&L  **_5
  ZZ'
 zz"?@
  #,,
 (*

 4 ZZ)
 % ** & ZZ
 " 

?+nJ)) nr   