
    Xh/                        d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZmZmZ d	 Z G d
 d      Z G d d      Z G d d      Z G d d      Zd"dZd Z G d d      Z G d de      Zd Zd Zd Zd Zd Zd Zd Zd Z d  Z!d! Z"y)#z$This module implements a CYK parser.    )defaultdictN   )
ParseError)Token)Tree)TerminalNonTerminalSymbolc                 X    t        | t              sJ | j                  |j                  k(  S N)
isinstanceTnametype)tss     D/var/www/html/myenv/lib/python3.12/site-packages/lark/parsers/cyk.pymatchr      s%    a66QVV    c                   @     e Zd ZdZ fdZd Zd Zd Zd Zd Z	 xZ
S )RulezContext-free grammar rule.c                     t         t        |           t        |t              sJ |       t        d |D              sJ |       || _        || _        || _        || _	        y )Nc              3   `   K   | ]&  }t        |t              xs t        |t               ( y wr   )r   NTr   .0xs     r   	<genexpr>z Rule.__init__.<locals>.<genexpr>   s&     FQ:a$8
1a(88Fs   ,.)
superr   __init__r   r   alllhsrhsweightalias)selfr"   r#   r$   r%   	__class__s        r   r    zRule.__init__   sZ    dD"$#r"'C'"F#FFKKF
r   c                 v    t        | j                        ddj                  d | j                  D              S )Nz ->  c              3   2   K   | ]  }t        |        y wr   strr   s     r   r   zRule.__str__.<locals>.<genexpr>"   s     4NSV4N   )r,   r"   joinr#   r&   s    r   __str__zRule.__str__!   s(     ]CHH4NTXX4N,NOOr   c                     t        |       S r   r+   r/   s    r   __repr__zRule.__repr__$       4yr   c                 V    t        | j                  t        | j                        f      S r   )hashr"   tupler#   r/   s    r   __hash__zRule.__hash__'   s    TXXuTXX/00r   c                 j    | j                   |j                   k(  xr | j                  |j                  k(  S r   )r"   r#   r&   others     r   __eq__zRule.__eq__*   s'    xx599$>UYY)>>r   c                     | |k(   S r    r9   s     r   __ne__zRule.__ne__-   s    EM""r   )__name__
__module____qualname____doc__r    r0   r2   r7   r;   r>   __classcell__r'   s   @r   r   r      s%    $P1?#r   r   c                   (    e Zd ZdZd Zd Zd Zd Zy)GrammarzContext-free grammar.c                 $    t        |      | _        y r   )	frozensetrules)r&   rI   s     r   r    zGrammar.__init__4   s    u%
r   c                 4    | j                   |j                   k(  S r   )rI   r9   s     r   r;   zGrammar.__eq__7   s    zzU[[((r   c                 d    ddj                  t        d | j                  D                    z   dz   S )N
c              3   2   K   | ]  }t        |        y wr   )reprr   s     r   r   z"Grammar.__str__.<locals>.<genexpr>;   s     &C1tAw&Cr-   )r.   sortedrI   r/   s    r   r0   zGrammar.__str__:   s*    dii&C

&C CDDtKKr   c                     t        |       S r   r+   r/   s    r   r2   zGrammar.__repr__=   r3   r   N)r?   r@   rA   rB   r    r;   r0   r2   r=   r   r   rF   rF   1   s    &)Lr   rF   c                       e Zd ZdZddZd Zy)RuleNodez@A node in the parse tree, which also contains the full rhs rule.c                 .    || _         || _        || _        y r   )rulechildrenr$   )r&   rT   rU   r$   s       r   r    zRuleNode.__init__E   s    	 r   c                     dt        | j                  j                        ddj                  d | j                  D              dS )Nz	RuleNode(z, [z, c              3   2   K   | ]  }t        |        y wr   r+   r   s     r   r   z$RuleNode.__repr__.<locals>.<genexpr>K   s     EdQRc!fEdr-   z]))rN   rT   r"   r.   rU   r/   s    r   r2   zRuleNode.__repr__J   s0    '+DIIMM':DIIEdVZVcVcEd<deer   Nr   )r?   r@   rA   rB   r    r2   r=   r   r   rR   rR   B   s    J
fr   rR   c                   4     e Zd ZdZ fdZd Zd Zd Z xZS )ParserzParser wrapper.c                     t         t        |           |D ci c]  }|| c}| _        |D cg c]  }| j	                  |       }}t        t        |            | _        y c c}w c c}w r   )r   rZ   r    
orig_rules_to_ruleto_cnfrF   grammar)r&   rI   rT   r'   s      r   r    zParser.__init__R   sZ    fd$&278$4:8167t$77gen- 97s
   
A"A'c                     t        |j                  t              sJ t        d |j                  D              sJ t        |j                  |j                  |j                  j                  r|j                  j                  |      S d|      S )z?Converts a lark rule, (lhs, rhs, callback, options), to a Rule.c              3   <   K   | ]  }t        |t                y wr   )r   r
   r   s     r   r   z"Parser._to_rule.<locals>.<genexpr>[   s     FQ:a(F   r   r$   r%   )r   originr   r!   	expansionr   optionspriority)r&   	lark_rules     r   r]   zParser._to_ruleX   s    )**B///F)2E2EFFFFi111:1B1B1K1K9$$-- 	QR 	r   c                    sJ t              t        || j                        \  }}t        fd|dt	        |      dz
  f   D              rt        d      |dt	        |      dz
  f      }| j                  t        |            S )z(Parses input, which is a list of tokens.c              3   <   K   | ]  }|j                   k7    y wr   )r"   )r   rstarts     r   r   zParser.parse.<locals>.<genexpr>h   s     F!quu~Fs   r      zParsing failed.)r   _parser_   r!   lenr   _to_tree
revert_cnf)r&   	tokenizedrl   tabletreesparses     `   r   ru   zParser.parsea   s    u5	i6uFuaY!1C-D'EFF.//q#i.1,-.u5}}Z.//r   c                 |   | j                   |j                  j                     }g }|j                  D ]j  }t	        |t
              r!|j                  | j                  |             4t	        |j                  t              sJ |j                  |j                         l t        |j                  |      }||_        |S )z.Converts a RuleNode parse tree to a lark Tree.)r\   rT   r%   rU   r   rR   appendrp   r   r   r   rd   )r&   	rule_node	orig_rulerU   childr   s         r   rp   zParser._to_treem   s    OOINN$8$89	'' 	,E%*e 45!%**e444

+	, !!8,r   )	r?   r@   rA   rB   r    r]   ru   rp   rC   rD   s   @r   rZ   rZ   O   s    .
0r   rZ   c                    t        | t              rRt        d|dz  z  t        | j                  j
                        z          | j                  D ]  }t        ||dz           y t        d|dz  z  t        | j                        z          y )Nr)   r   rm   )	r   rR   printr,   rT   r"   rU   print_parser   )nodeindentrz   s      r   r}   r}   |   sp    $!cVaZ 3tyy}}#556]] 	+Evz*	+ 	cVaZ 3tvv;./r   c           
         t        t              }t        t              }t        |       D ]  \  }}|j                  j                         D ]  \  }}t        ||      s|D ]  }|||f   j                  |       |j                  |||f   vs,|j                  |||f   |j                     j                  k  sXt        |t        |      g|j                        |||f   |j                  <      t        dt        |       dz         D ]z  }	t        t        |       |	z
  dz         D ]Y  }t        |dz   ||	z         D ]@  }
||
dz
  f}|
||	z   dz
  f}t        j                  ||   ||         D ]  \  }}|j                   j#                  |j                  |j                  fg       D ]  }||||	z   dz
  f   j                  |       ||   |j                     }||   |j                     }|j                  |j                  z   |j                  z   }|j                  ||||	z   dz
  f   vs(|||||	z   dz
  f   |j                     j                  k  st        |||g|      ||||	z   dz
  f   |j                  <     C \ } ||fS )z*Parses sentence 's' using CNF grammar 'g'.r$   r   rm   )r   setdict	enumerateterminal_rulesitemsr   addr"   r$   rR   r   rangero   	itertoolsproductnonterminal_rulesget)r   grs   rt   iwterminalrI   rT   lpspan1span2r1r2r1_treer2_treerule_total_weights                     r   rn   rn      s    E E! ]1 //557 	]OHeXq!! ]D1a&M%%d+q!f5eQFmDHH&=&D&DD2:4!A$PTP[P[2\q!fdhh/	]	]] 1c!fqj! {s1vzA~& 	{A1q5!a%( {AE
AEAI'//eeElK {FB ! 3 3 7 78H" M {q!a%!)n-11$7"',rvv"6"',rvv"6,0KK'..,H7>>,Y) HHE1a!eai.,AA05!QUQY3H3R3Y3YY>FtgW^M_hy>zE1a!eai.1$((;{{{	{{  %<r   c                   .     e Zd ZdZ fdZd Zd Z xZS )
CnfWrapperzkCNF wrapper for grammar.

  Validates that the input grammar is CNF and provides helper data structures.
  c                    t         t        |           || _        |j                  | _        t        t              | _        t        t              | _        | j                  D ]  }t        |j                  t              sJ |       t        |j                        dvrt        d      t        |j                        dk(  rIt        |j                  d   t              r,| j                  |j                  d      j!                  |       t        |j                        dk(  rOt#        d |j                  D              r3| j                  t%        |j                           j!                  |       J |        y )N)rm   r   zCYK doesn't support empty rulesrm   r   r   c              3   <   K   | ]  }t        |t                y wr   )r   r   r   s     r   r   z&CnfWrapper.__init__.<locals>.<genexpr>   s     (JqAr):(Jrb   )r   r   r    r_   rI   r   listr   r   r   r"   r   ro   r#   r   r   rw   r!   r6   )r&   r_   rk   r'   s      r   r    zCnfWrapper.__init__   s   j$(*]]
)$/!,T!2 
	 AaeeR(+!+(155z' !BCC155zQ:aeeAh#:##AEE!H-44Q7QUUqS(JAEE(J%J&&uQUU|4;;A>au
	 r   c                 4    | j                   |j                   k(  S r   )r_   r9   s     r   r;   zCnfWrapper.__eq__   s    ||u}},,r   c                 ,    t        | j                        S r   )rN   r_   r/   s    r   r2   zCnfWrapper.__repr__   s    DLL!!r   )r?   r@   rA   rB   r    r;   r2   rC   rD   s   @r   r   r      s    
 $-"r   r   c                   @     e Zd ZdZ fdZd Zej                  Z xZS )UnitSkipRulez@A rule that records NTs that were skipped during transformation.c                 @    t         t        |   ||||       || _        y r   )r   r   r    skipped_rules)r&   r"   r#   r   r$   r%   r'   s         r   r    zUnitSkipRule.__init__   s     lD*3VUC*r   c                 b    t        |t        |             xr | j                  |j                  k(  S r   )r   r   r   r9   s     r   r;   zUnitSkipRule.__eq__   s)    %d,Z1C1CuGZGZ1ZZr   )	r?   r@   rA   rB   r    r;   r   r7   rC   rD   s   @r   r   r      s    J+[ }}Hr   r   c                 .   g }t        | t              r|| j                  z  }|j                  |       t        |t              r||j                  z  }t        | j                  |j
                  || j                  |j                  z   | j                        S )Nrc   )r   r   r   rw   r"   r#   r$   r%   )	unit_ruletarget_ruler   s      r   build_unit_skipruler      s    M)\*000%+|,222	{&--0B0BB)//[ [r   c                     | j                   D ]<  }t        |j                        dk(  st        |j                  d   t              s:|c S  y)zDReturns a non-terminal unit rule from 'g', or None if there is none.rm   r   N)rI   ro   r#   r   r   )r   rT   s     r   get_any_nt_unit_ruler      s@     txx=A*TXXa[""=K r   c                    | j                   D cg c]
  }||k7  s	| }}| j                   D cg c]!  }|j                  |j                  d   k(  s |# }}||D cg c]  }t        ||       c}z  }t	        |      S c c}w c c}w c c}w )zFRemoves 'rule' from 'g' without changing the language produced by 'g'.r   )rI   r"   r#   r   rF   )r   rT   r   	new_rulesrefsrefs         r   _remove_unit_ruler      s|    GG1qqDy1I1ww7!!%%488A;"6A7D7DAS%dC0AAI9 27As   
A>A>!BBBc           
   #   j  K   t        | j                        dz   dj                  d | j                  D              z   }d|z  dz   }t	        | j                  | j                  d   t        |dz        g| j                  | j                         t        dt        | j                        d	z
        D ]=  }t	        t        ||z        | j                  |   t        ||dz   z        gdd
       ? t	        t        |t        | j                        d	z
  z        | j                  dd dd
       yw)z4Splits a rule whose len(rhs) > 2 into shorter rules.___c              3   2   K   | ]  }t        |        y wr   r+   r   s     r   r   z_split.<locals>.<genexpr>  s     .H!s1v.Hr-   z__SP_%sz_%dr   rm   rc   r   SplitN)
r,   r"   r.   r#   r   r   r$   r%   r   ro   )rT   rule_str	rule_namer   s       r   _splitr      s    488}t#chh.Htxx.H&HHHX&.I
txx$((1+r)a-'89$++UYU_U_
``1c$((ma'( g2i!m$txx{ByAE7J4K&LUV^effg
r)s488}q012DHHRSM!SZ
[[s   D1D3c                    | j                   D ch c]&  }|j                  D ]  }t        |t              s| ( }}}|D ci c](  }|t	        t        dt        |      z        |gdd      * }}g }| j                   D ]  t        j                        dkD  rt        d j                  D              rj                  D cg c]#  }t        |t              r||   j                  n|% }}|j                  t	        j                  |j                  j                               |j                  fd|j                         D               |j                          t        |      S c c}}w c c}w c c}w )z/Applies the TERM rule on 'g' (see top comment).z__T_%sr   Termrc   rm   c              3   <   K   | ]  }t        |t                y wr   )r   r   r   s     r   r   z_term.<locals>.<genexpr>  s     $H!Z1%5$Hrb   c              3   F   K   | ]  \  }}|j                   v s|  y wr   )r#   )r   kvrT   s      r   r   z_term.<locals>.<genexpr>  s     L41aa488mQLs   !!)rI   r#   r   r   r   r   r,   ro   anyr"   rw   r$   r%   extendr   rF   )r   rT   r   all_tr   t_rulesr   new_rhss    `      r   _termr   	  s5   77I4DHHIq
1a8HQIQIEITYZqq$r(SV+,qc!6JJZGZI #txx=1$Htxx$H!HJN((SQAq)9wqz~~q@SGST$((GDKKtzzZ[L7==?LLT"# 9 JZ Ts   $E2E2-E8(E=c                     g }| j                   D ]:  }t        |j                        dkD  r|t        |      z  }*|j	                  |       < t        |      S )z.Applies the BIN rule to 'g' (see top comment).r   )rI   ro   r#   r   rw   rF   )r   r   rT   s      r   _binr     sS    I #txx=1%IT"	#
 9r   c                 T    t        |       }|rt        | |      } t        |       }|r| S )z/Applies the UNIT rule to 'g' (see top comment).)r   r   )r   nt_unit_rules     r   _unitr   #  s1    '*L
a.+A.  Hr   c                 R    t        t        t        |                   } t        |       S )z>Creates a CNF grammar from a general context-free grammar 'g'.)r   r   r   r   )r   s    r   r^   r^   ,  s    d58nAa=r   c                 $   |st        t        | |||      ||      S ||d   j                  z
  }t        t        | |d   j                  g||      t	        |d   j                  ||dd  ||d   j                  |d   j
                        g|      S )Nrc   r   r   rm   )rR   r   r$   r"   unroll_unit_skipruler%   )r"   orig_rhsr   rU   r$   r%   s         r   r   r   2  s    S(6GZ`aa-*111}Q'++,V5I$]1%5%9%98 -ab 18 -a 0 7 7q9I9O9OQL  	r   c                    t        | t              r| S | j                  j                  j                  j                  d      r| j                  d   S g }t        t        | j                        D ]b  }t        |t              r?|j                  j                  j                  j                  d      r||j                  z  }R|j                  |       d t        | j                  t              rtt        | j                  j                  | j                  j                  | j                  j                  || j                  j                  | j                  j                         S t        | j                  |      S )zDReverts a parse tree (RuleNode) to its original non-CNF form (Node).__T_r   __SP_)r   r   rT   r"   r   
startswithrU   maprq   rR   rw   r   r   r#   r   r$   r%   )r~   rU   rz   s      r   rq   rq   ?  s   $yy}}$$V,}}QT]]3 	'E%*uzz~~/B/B/M/Mg/VENN*&	' dii.'		tyy}}$(II$;$;X$(II$4$4diiooG G DIIx00r   rX   )#rB   collectionsr   r   
exceptionsr   lexerr   treer   r_   r   r   r	   r   r
   r   r   rF   rR   rZ   r}   rn   r   r   r   r   r   r   r   r   r   r^   r   rq   r=   r   r   <module>r      s    * $  #   > >
# #8 "	f 	f* *Z0"f" ">
4 
[\
1r   