Sindbad~EG File Manager

Current Path : /usr/lib/python3.6/site-packages/html5lib/__pycache__/
Upload File :
Current File : //usr/lib/python3.6/site-packages/html5lib/__pycache__/_tokenizer.cpython-36.opt-1.pyc

3

B;�W+�@s�ddlmZmZmZddlmZddlmZddl	m
Z
ddl	mZddl	mZm
Z
ddl	mZmZmZdd	l	mZmZdd
l	mZddlmZddlmZee�ZGd
d�de�ZdS)�)�absolute_import�division�unicode_literals)�unichr)�deque�)�spaceCharacters)�entities)�asciiLetters�asciiUpper2Lower)�digits�	hexDigits�EOF)�
tokenTypes�
tagTokenTypes)�replacementCharacters)�HTMLInputStream)�TriecsdeZdZdZd��fdd�	Zdd�Zdd�Zd�d
d�Zdd
�Zdd�Z	dd�Z
dd�Zdd�Zdd�Z
dd�Zdd�Zdd�Zdd�Zd d!�Zd"d#�Zd$d%�Zd&d'�Zd(d)�Zd*d+�Zd,d-�Zd.d/�Zd0d1�Zd2d3�Zd4d5�Zd6d7�Zd8d9�Zd:d;�Zd<d=�Z d>d?�Z!d@dA�Z"dBdC�Z#dDdE�Z$dFdG�Z%dHdI�Z&dJdK�Z'dLdM�Z(dNdO�Z)dPdQ�Z*dRdS�Z+dTdU�Z,dVdW�Z-dXdY�Z.dZd[�Z/d\d]�Z0d^d_�Z1d`da�Z2dbdc�Z3ddde�Z4dfdg�Z5dhdi�Z6djdk�Z7dldm�Z8dndo�Z9dpdq�Z:drds�Z;dtdu�Z<dvdw�Z=dxdy�Z>dzd{�Z?d|d}�Z@d~d�ZAd�d��ZBd�d��ZCd�d��ZDd�d��ZEd�d��ZFd�d��ZGd�d��ZHd�d��ZId�d��ZJd�d��ZKd�d��ZL�ZMS)��
HTMLTokenizera	 This class takes care of tokenizing HTML.

    * self.currentToken
      Holds the token that is currently being processed.

    * self.state
      Holds a reference to the method to be invoked... XXX

    * self.stream
      Points to HTMLInputStream object.
    NcsFt|f|�|_||_d|_g|_|j|_d|_d|_t	t
|�j�dS)NF)r�stream�parserZ
escapeFlagZ
lastFourChars�	dataState�state�escape�currentToken�superr�__init__)�selfrr�kwargs)�	__class__�� /usr/lib/python3.6/_tokenizer.pyr"szHTMLTokenizer.__init__ccs\tg�|_xL|j�rVx&|jjr:td|jjjd�d�VqWx|jrR|jj�Vq>WqWdS)z� This is where the magic happens.

        We do our usually processing through the states and when we have a token
        to return we yield the token which pauses processing until the next token
        is requested.
        �
ParseErrorr)�type�dataN)r�
tokenQueuerr�errorsr�pop�popleft)rr r r!�__iter__1s


zHTMLTokenizer.__iter__c	%Cs(t}d}|rt}d}g}|jj�}x(||krJ|tk	rJ|j|�|jj�}q$Wtdj|�|�}|tkr�t|}|j	jt
ddd|id���nld|ko�d	kns�|d
kr�d}|j	jt
ddd|id���n(d|ko�d
kn�s�d|ko�dkn�s�d|k�odkn�s�d|k�o4dkn�s�|tddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d
g#�k�r�|j	jt
ddd|id��yt|�}Wn>t
k
�r�|d6}td|d?B�td7|d8@B�}YnX|d9k�r$|j	jt
dd:d;��|jj|�|S)<z�This function returns either U+FFFD or the character based on the
        decimal or hexadecimal representation. It also discards ";" if present.
        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
        �
��r"z$illegal-codepoint-for-numeric-entity�	charAsInt)r#r$�datavarsi�i��i��u�r�����i�i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��	i��	i��
i��
i��i��i��i��i��
i��
i��i��i��i��i��ii�i��;z numeric-entity-without-semicolon)r#r$)rr
r�charr�append�int�joinrr%r�	frozenset�chr�
ValueError�unget)	rZisHexZallowed�radix�	charStack�cr-r6�vr r r!�consumeNumberEntityAs`

&

z!HTMLTokenizer.consumeNumberEntityFc	
Cs�d}|jj�g}|dtksB|dtddfksB|dk	rV||dkrV|jj|d��n"|ddk�rd}|j|jj��|ddkr�d	}|j|jj��|r�|dtks�|r�|dtkr�|jj|d�|j|�}n4|j	jt
d
dd��|jj|j��dd
j|�}�njx8|dtk	�rFt
jd
j|���s2P|j|jj���qWy$t
jd
j|dd���}t|�}Wntk
�r�d}YnX|dk	�rD|ddk�r�|j	jt
d
dd��|ddk�r|�r||tk�s�||tk�s�||dk�r|jj|j��dd
j|�}n.t|}|jj|j��|d
j||d��7}n4|j	jt
d
dd��|jj|j��dd
j|�}|�r�|jddd|7<n*|tk�r�d}nd}|j	jt
||d��dS)N�&r�<�#Fr�x�XTr"zexpected-numeric-entity)r#r$r,r5znamed-entity-without-semicolon�=zexpected-named-entityr$�SpaceCharacters�
Characters���)rFrGrKrKrKrKrKrKrKrK)rr6rrr=r7r
rrBr%rr'r9�entitiesTrieZhas_keys_with_prefixZlongest_prefix�len�KeyErrorr
r	r)	r�allowedChar�
fromAttribute�outputr?�hexZ
entityNameZentityLengthZ	tokenTyper r r!�
consumeEntity�sf





zHTMLTokenizer.consumeEntitycCs|j|dd�dS)zIThis method replaces the need for "entityInAttributeValueState".
        T)rOrPN)rS)rrOr r r!�processEntityInAttribute�sz&HTMLTokenizer.processEntityInAttributecCs�|j}|dtkrp|djt�|d<|dtdkrp|drR|jjtddd��|drp|jjtdd	d��|jj|�|j|_d
S)z�This method is a generic handler for emitting the tags. It also sets
        the state to "data" because that's what's needed after a token has been
        emitted.
        r#�name�EndTagr$r"zattributes-in-end-tag)r#r$�selfClosingzself-closing-flag-on-end-tagN)	rr�	translaterrr%r7rr)r�tokenr r r!�emitCurrentToken�s

zHTMLTokenizer.emitCurrentTokencCs�|jj�}|dkr|j|_n�|dkr.|j|_n�|dkrd|jjtddd��|jjtddd��n`|tkrpdS|t	kr�|jjtd	||jj
t	d
�d��n&|jj
d�}|jjtd||d��d
S)NrCrD�r"zinvalid-codepoint)r#r$rJFrIT)rCrDr[)rr6�entityDataStater�tagOpenStater%r7rrr�
charsUntil)rr$�charsr r r!r�s&



zHTMLTokenizer.dataStatecCs|j�|j|_dS)NT)rSrr)rr r r!r\szHTMLTokenizer.entityDataStatecCs�|jj�}|dkr|j|_n�|dkr.|j|_n�|tkr:dS|dkrp|jjtddd��|jjtdd	d��nT|t	kr�|jjtd
||jj
t	d�d��n&|jj
d�}|jjtd||d��dS)
NrCrDFr[r"zinvalid-codepoint)r#r$rJu�rIT)rCrDr[)rr6�characterReferenceInRcdatar�rcdataLessThanSignStaterr%r7rrr^)rr$r_r r r!�rcdataStates&



zHTMLTokenizer.rcdataStatecCs|j�|j|_dS)NT)rSrbr)rr r r!r`1sz(HTMLTokenizer.characterReferenceInRcdatacCs�|jj�}|dkr|j|_nh|dkrR|jjtddd��|jjtddd��n2|tkr^dS|jjd
�}|jjtd||d��d	S)NrDr[r"zinvalid-codepoint)r#r$rJu�FT)rDr[)	rr6�rawtextLessThanSignStaterr%r7rrr^)rr$r_r r r!�rawtextState6s


zHTMLTokenizer.rawtextStatecCs�|jj�}|dkr|j|_nh|dkrR|jjtddd��|jjtddd��n2|tkr^dS|jjd
�}|jjtd||d��d	S)NrDr[r"zinvalid-codepoint)r#r$rJu�FT)rDr[)	rr6�scriptDataLessThanSignStaterr%r7rrr^)rr$r_r r r!�scriptDataStateHs


zHTMLTokenizer.scriptDataStatecCsr|jj�}|tkrdS|dkrL|jjtddd��|jjtddd��n"|jjtd||jjd�d��dS)	NFr[r"zinvalid-codepoint)r#r$rJu�T)rr6rr%r7rr^)rr$r r r!�plaintextStateZs

zHTMLTokenizer.plaintextStatecCs|jj�}|dkr|j|_n�|dkr.|j|_n�|tkrVtd|gddd�|_|j|_n�|dkr�|j	j
tddd	��|j	j
td
dd	��|j|_nt|dkr�|j	j
tdd
d	��|jj|�|j
|_n@|j	j
tddd	��|j	j
td
dd	��|jj|�|j|_dS)N�!�/ZStartTagF)r#rUr$rWZselfClosingAcknowledged�>r"z'expected-tag-name-but-got-right-bracket)r#r$rJz<>�?z'expected-tag-name-but-got-question-markzexpected-tag-namerDT)rr6�markupDeclarationOpenStater�closeTagOpenStater
rr�tagNameStater%r7rr=�bogusCommentState)rr$r r r!r]is6









zHTMLTokenizer.tagOpenStatecCs�|jj�}|tkr0td|gdd�|_|j|_n�|dkrX|jjtddd��|j	|_nn|t
kr�|jjtddd��|jjtd	d
d��|j	|_n0|jjtddd|id
��|jj|�|j|_dS)NrVF)r#rUr$rWrjr"z*expected-closing-tag-but-got-right-bracket)r#r$z expected-closing-tag-but-got-eofrJz</z!expected-closing-tag-but-got-charr$)r#r$r.T)
rr6r
rrrnrr%r7rrr=ro)rr$r r r!rm�s(





zHTMLTokenizer.closeTagOpenStatecCs�|jj�}|tkr|j|_n�|dkr.|j�n~|tkrV|jjt	ddd��|j
|_nV|dkrh|j|_nD|dkr�|jjt	ddd��|jdd	7<n|jd|7<d
S)Nrjr"zeof-in-tag-name)r#r$rir[zinvalid-codepointrUu�T)
rr6r�beforeAttributeNameStaterrZrr%r7rr�selfClosingStartTagStater)rr$r r r!rn�s"






zHTMLTokenizer.tagNameStatecCsP|jj�}|dkr"d|_|j|_n*|jjtddd��|jj|�|j	|_dS)Nrir,rJrD)r#r$T)
rr6�temporaryBuffer�rcdataEndTagOpenStaterr%r7rr=rb)rr$r r r!ra�s

z%HTMLTokenizer.rcdataLessThanSignStatecCsX|jj�}|tkr*|j|7_|j|_n*|jjtddd��|jj	|�|j
|_dS)NrJz</)r#r$T)rr6r
rr�rcdataEndTagNameStaterr%r7rr=rb)rr$r r r!rs�s

z#HTMLTokenizer.rcdataEndTagOpenStatecCs|jo|jdj�|jj�k}|jj�}|tkrT|rTtd|jgdd�|_|j|_n�|dkr�|r�td|jgdd�|_|j	|_n||dkr�|r�td|jgdd�|_|j
�|j|_nH|tkr�|j|7_n0|j
jtdd|jd	��|jj|�|j|_d
S)NrUrVF)r#rUr$rWrirjrJz</)r#r$T)r�lowerrrrr6rrrprrqrZrr
r%r7r=rb)r�appropriater$r r r!rt�s2



z#HTMLTokenizer.rcdataEndTagNameStatecCsP|jj�}|dkr"d|_|j|_n*|jjtddd��|jj|�|j	|_dS)Nrir,rJrD)r#r$T)
rr6rr�rawtextEndTagOpenStaterr%r7rr=rd)rr$r r r!rc�s

z&HTMLTokenizer.rawtextLessThanSignStatecCsX|jj�}|tkr*|j|7_|j|_n*|jjtddd��|jj	|�|j
|_dS)NrJz</)r#r$T)rr6r
rr�rawtextEndTagNameStaterr%r7rr=rd)rr$r r r!rw�s

z$HTMLTokenizer.rawtextEndTagOpenStatecCs|jo|jdj�|jj�k}|jj�}|tkrT|rTtd|jgdd�|_|j|_n�|dkr�|r�td|jgdd�|_|j	|_n||dkr�|r�td|jgdd�|_|j
�|j|_nH|tkr�|j|7_n0|j
jtdd|jd	��|jj|�|j|_d
S)NrUrVF)r#rUr$rWrirjrJz</)r#r$T)rrurrrr6rrrprrqrZrr
r%r7r=rd)rrvr$r r r!rxs2



z$HTMLTokenizer.rawtextEndTagNameStatecCsx|jj�}|dkr"d|_|j|_nR|dkrJ|jjtddd��|j|_n*|jjtddd��|jj	|�|j
|_dS)	Nrir,rhrJz<!)r#r$rDT)rr6rr�scriptDataEndTagOpenStaterr%r7r�scriptDataEscapeStartStater=rf)rr$r r r!res


z)HTMLTokenizer.scriptDataLessThanSignStatecCsX|jj�}|tkr*|j|7_|j|_n*|jjtddd��|jj	|�|j
|_dS)NrJz</)r#r$T)rr6r
rr�scriptDataEndTagNameStaterr%r7rr=rf)rr$r r r!ry,s

z'HTMLTokenizer.scriptDataEndTagOpenStatecCs|jo|jdj�|jj�k}|jj�}|tkrT|rTtd|jgdd�|_|j|_n�|dkr�|r�td|jgdd�|_|j	|_n||dkr�|r�td|jgdd�|_|j
�|j|_nH|tkr�|j|7_n0|j
jtdd|jd	��|jj|�|j|_d
S)NrUrVF)r#rUr$rWrirjrJz</)r#r$T)rrurrrr6rrrprrqrZrr
r%r7r=rf)rrvr$r r r!r{7s2



z'HTMLTokenizer.scriptDataEndTagNameStatecCsJ|jj�}|dkr2|jjtddd��|j|_n|jj|�|j|_dS)N�-rJ)r#r$T)	rr6r%r7r�scriptDataEscapeStartDashStaterr=rf)rr$r r r!rzSs

z(HTMLTokenizer.scriptDataEscapeStartStatecCsJ|jj�}|dkr2|jjtddd��|j|_n|jj|�|j|_dS)Nr|rJ)r#r$T)	rr6r%r7r�scriptDataEscapedDashDashStaterr=rf)rr$r r r!r}]s

z,HTMLTokenizer.scriptDataEscapeStartDashStatecCs�|jj�}|dkr2|jjtddd��|j|_n�|dkrD|j|_nn|dkrz|jjtddd��|jjtddd��n8|tkr�|j	|_n&|jj
d
�}|jjtd||d��d	S)Nr|rJ)r#r$rDr[r"zinvalid-codepointu�T)rDr|r[)rr6r%r7r�scriptDataEscapedDashStater�"scriptDataEscapedLessThanSignStaterrr^)rr$r_r r r!�scriptDataEscapedStategs"




z$HTMLTokenizer.scriptDataEscapedStatecCs�|jj�}|dkr2|jjtddd��|j|_n�|dkrD|j|_nn|dkr�|jjtddd��|jjtddd��|j|_n0|t	kr�|j
|_n|jjtd|d��|j|_d	S)
Nr|rJ)r#r$rDr[r"zinvalid-codepointu�T)rr6r%r7rr~rr�r�rr)rr$r r r!r{s"






z(HTMLTokenizer.scriptDataEscapedDashStatecCs�|jj�}|dkr*|jjtddd��n�|dkr<|j|_n�|dkrd|jjtddd��|j|_nn|dkr�|jjtddd��|jjtdd	d��|j|_n0|t	kr�|j
|_n|jjtd|d��|j|_d
S)Nr|rJ)r#r$rDrjr[r"zinvalid-codepointu�T)rr6r%r7rr�rrfr�rr)rr$r r r!r~�s&






z,HTMLTokenizer.scriptDataEscapedDashDashStatecCs�|jj�}|dkr"d|_|j|_n\|tkrT|jjtdd|d��||_|j	|_n*|jjtddd��|jj
|�|j|_dS)Nrir,rJrD)r#r$T)rr6rr� scriptDataEscapedEndTagOpenStaterr
r%r7r� scriptDataDoubleEscapeStartStater=r�)rr$r r r!r��s


z0HTMLTokenizer.scriptDataEscapedLessThanSignStatecCsP|jj�}|tkr"||_|j|_n*|jjtddd��|jj	|�|j
|_dS)NrJz</)r#r$T)rr6r
rr� scriptDataEscapedEndTagNameStaterr%r7rr=r�)rr$r r r!r��s

z.HTMLTokenizer.scriptDataEscapedEndTagOpenStatecCs|jo|jdj�|jj�k}|jj�}|tkrT|rTtd|jgdd�|_|j|_n�|dkr�|r�td|jgdd�|_|j	|_n||dkr�|r�td|jgdd�|_|j
�|j|_nH|tkr�|j|7_n0|j
jtdd|jd	��|jj|�|j|_d
S)NrUrVF)r#rUr$rWrirjrJz</)r#r$T)rrurrrr6rrrprrqrZrr
r%r7r=r�)rrvr$r r r!r��s2



z.HTMLTokenizer.scriptDataEscapedEndTagNameStatecCs�|jj�}|ttd�BkrR|jjtd|d��|jj�dkrH|j	|_
q�|j|_
nB|tkr�|jjtd|d��|j|7_n|jj
|�|j|_
dS)NrirjrJ)r#r$�scriptT)rirj)rr6rr:r%r7rrrru�scriptDataDoubleEscapedStaterr�r
r=)rr$r r r!r��s


z.HTMLTokenizer.scriptDataDoubleEscapeStartStatecCs�|jj�}|dkr2|jjtddd��|j|_n�|dkrZ|jjtddd��|j|_nt|dkr�|jjtddd��|jjtddd��n>|tkr�|jjtdd	d��|j	|_n|jjtd|d��d
S)Nr|rJ)r#r$rDr[r"zinvalid-codepointu�zeof-in-script-in-scriptT)
rr6r%r7r� scriptDataDoubleEscapedDashStater�(scriptDataDoubleEscapedLessThanSignStaterr)rr$r r r!r��s$





z*HTMLTokenizer.scriptDataDoubleEscapedStatecCs�|jj�}|dkr2|jjtddd��|j|_n�|dkrZ|jjtddd��|j|_n�|dkr�|jjtddd��|jjtddd��|j|_nF|t	kr�|jjtdd	d��|j
|_n|jjtd|d��|j|_d
S)Nr|rJ)r#r$rDr[r"zinvalid-codepointu�zeof-in-script-in-scriptT)rr6r%r7r�$scriptDataDoubleEscapedDashDashStaterr�r�rr)rr$r r r!r�s(







z.HTMLTokenizer.scriptDataDoubleEscapedDashStatecCs|jj�}|dkr*|jjtddd��n�|dkrR|jjtddd��|j|_n�|dkrz|jjtddd��|j|_n�|dkr�|jjtddd��|jjtdd	d��|j|_nF|t	kr�|jjtdd
d��|j
|_n|jjtd|d��|j|_dS)Nr|rJ)r#r$rDrjr[r"zinvalid-codepointu�zeof-in-script-in-scriptT)rr6r%r7rr�rrfr�rr)rr$r r r!r�s,







z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatecCsP|jj�}|dkr8|jjtddd��d|_|j|_n|jj|�|j	|_dS)NrirJ)r#r$r,T)
rr6r%r7rrr�scriptDataDoubleEscapeEndStaterr=r�)rr$r r r!r�0s

z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatecCs�|jj�}|ttd�BkrR|jjtd|d��|jj�dkrH|j	|_
q�|j|_
nB|tkr�|jjtd|d��|j|7_n|jj
|�|j|_
dS)NrirjrJ)r#r$r�T)rirj)rr6rr:r%r7rrrrur�rr�r
r=)rr$r r r!r�;s


z,HTMLTokenizer.scriptDataDoubleEscapeEndStatecCs0|jj�}|tkr$|jjtd��n|tkrJ|jdj|dg�|j|_n�|dkr\|j	�n�|dkrn|j
|_n�|dkr�|jjtd
dd��|jdj|dg�|j|_n�|d
kr�|jjtd
dd��|jdjddg�|j|_nF|t
k�r|jjtd
dd��|j|_n|jdj|dg�|j|_dS)NTr$r,rjri�'�"rHrDr"z#invalid-character-in-attribute-name)r#r$r[zinvalid-codepointu�z#expected-attribute-name-but-got-eof)r�r�rHrD)rr6rr^r
rr7�attributeNameStaterrZrqr%rrr)rr$r r r!rpKs6










z&HTMLTokenizer.beforeAttributeNameStatecCs�|jj�}d}d}|dkr&|j|_�n0|tkr^|jddd||jjtd�7<d}�n�|dkrld}n�|tkr~|j|_n�|dkr�|j	|_n�|d	kr�|j
jtd
dd��|jdddd
7<d}n�|dk�r|j
jtd
dd��|jddd|7<d}nH|t
k�r8|j
jtd
dd��|j|_n|jddd|7<d}|�r�|jdddjt�|jddd<xP|jddd�D]:\}}|jddd|k�r�|j
jtd
dd��P�q�W|�r�|j�dS)NTFrHr$rrrjrir[r"zinvalid-codepoint)r#r$u�r�r�rDz#invalid-character-in-attribute-namezeof-in-attribute-namezduplicate-attributerKrK)r�r�rDrKrKrKrKrKrK)rr6�beforeAttributeValueStaterr
rr^r�afterAttributeNameStaterqr%r7rrrrXrrZ)rr$ZleavingThisStateZ	emitTokenrU�_r r r!r�isR








&
z HTMLTokenizer.attributeNameStatecCsF|jj�}|tkr$|jjtd��n|dkr8|j|_�n
|dkrJ|j�n�|tkrp|jdj	|dg�|j
|_n�|dkr�|j|_n�|dkr�|jj	t
dd	d
��|jdj	ddg�|j
|_n�|dk�r�|jj	t
ddd
��|jdj	|dg�|j
|_nF|tk�r&|jj	t
ddd
��|j|_n|jdj	|dg�|j
|_dS)NTrHrjr$r,rir[r"zinvalid-codepoint)r#r$u�r�r�rDz&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)r�r�rD)rr6rr^r�rrZr
rr7r�rqr%rrr)rr$r r r!r��s:











z%HTMLTokenizer.afterAttributeNameStatecCsj|jj�}|tkr$|jjtd��nB|dkr8|j|_�n.|dkrX|j|_|jj|��n|dkrl|j|_�n�|dkr�|j	j
tddd��|j�n�|d	kr�|j	j
tdd
d��|j
dddd
7<|j|_n�|dk�r|j	j
tddd��|j
ddd|7<|j|_nL|tk�rD|j	j
tddd��|j|_n"|j
ddd|7<|j|_dS)NTr�rCr�rjr"z.expected-attribute-value-but-got-right-bracket)r#r$r[zinvalid-codepointr$ru�rHrD�`z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eofrK)rHrDr�rKrK)rr6rr^�attributeValueDoubleQuotedStater�attributeValueUnQuotedStater=�attributeValueSingleQuotedStater%r7rrZrrr)rr$r r r!r��s>










z'HTMLTokenizer.beforeAttributeValueStatecCs�|jj�}|dkr|j|_n�|dkr0|jd�n�|dkrj|jjtddd��|jdddd	7<nN|t	kr�|jjtdd
d��|j
|_n&|jdd
d||jjd�7<dS)Nr�rCr[r"zinvalid-codepoint)r#r$r$ru�z#eof-in-attribute-value-double-quoteTrKrK)r�rCr[)rr6�afterAttributeValueStaterrTr%r7rrrrr^)rr$r r r!r��s 




z-HTMLTokenizer.attributeValueDoubleQuotedStatecCs�|jj�}|dkr|j|_n�|dkr0|jd�n�|dkrj|jjtddd��|jdddd	7<nN|t	kr�|jjtdd
d��|j
|_n&|jdd
d||jjd�7<dS)Nr�rCr[r"zinvalid-codepoint)r#r$r$ru�z#eof-in-attribute-value-single-quoteTrKrK)r�rCr[)rr6r�rrTr%r7rrrrr^)rr$r r r!r��s 




z-HTMLTokenizer.attributeValueSingleQuotedStatecCs|jj�}|tkr|j|_�n�|dkr2|jd�n�|dkrD|j�n�|dkr~|jjt	dd	d
��|j
ddd|7<n�|d
kr�|jjt	ddd
��|j
dddd7<nV|tkr�|jjt	ddd
��|j|_n.|j
ddd||jj
td�tB�7<dS)NrCrjr�r�rHrDr�r"z0unexpected-character-in-unquoted-attribute-value)r#r$r$rr[zinvalid-codepointu�z eof-in-attribute-value-no-quotesT)r�r�rHrDr�rKrKrK)rCrjr�r�rHrDr�r[)rr6rrprrTrZr%r7rrrrr^r:)rr$r r r!r�s,





z)HTMLTokenizer.attributeValueUnQuotedStatecCs�|jj�}|tkr|j|_n�|dkr.|j�np|dkr@|j|_n^|tkrt|jj	t
ddd��|jj|�|j|_n*|jj	t
ddd��|jj|�|j|_dS)Nrjrir"z$unexpected-EOF-after-attribute-value)r#r$z*unexpected-character-after-attribute-valueT)
rr6rrprrZrqrr%r7rr=r)rr$r r r!r� s"






z&HTMLTokenizer.afterAttributeValueStatecCs�|jj�}|dkr&d|jd<|j�n^|tkrZ|jjtddd��|jj|�|j	|_
n*|jjtddd��|jj|�|j|_
dS)NrjTrWr"z#unexpected-EOF-after-solidus-in-tag)r#r$z)unexpected-character-after-solidus-in-tag)rr6rrZrr%r7rr=rrrp)rr$r r r!rq4s





z&HTMLTokenizer.selfClosingStartTagStatecCsD|jjd�}|jdd�}|jjtd|d��|jj�|j|_dS)Nrjr[u��Comment)r#r$T)	rr^�replacer%r7rr6rr)rr$r r r!roFs
zHTMLTokenizer.bogusCommentStatecCs�|jj�g}|ddkrT|j|jj��|ddkrPtddd�|_|j|_dS�n�|ddkr�d}x.d&D]&}|j|jj��|d'|krjd}PqjW|r�tdddddd�|_|j|_dSn�|d(dk�rH|jdk	�rH|jj	j
�rH|jj	j
d)j|jj	jk�rHd}x2d*D]*}|j|jj��|d+|k�rd}P�qW|�rH|j
|_dS|jjtddd��x|�rz|jj|j���q`W|j|_dS),Nrr|r�r,)r#r$T�d�D�o�Or@�C�t�T�y�Y�p�P�e�EFZDoctype)r#rU�publicId�systemId�correct�[�Ar"zexpected-dashes-or-doctyperKrKrK)r�r��r�r��r@r��r�r��r�r��r�r��r�r�)r�r�r�r�r�r�rKrKrK)r�r�r�r�r�r�rK)rr6r7rr�commentStartStater�doctypeStaterZtreeZopenElements�	namespaceZdefaultNamespace�cdataSectionStater%r=r'ro)rr?�matched�expectedr r r!rlUsR


z(HTMLTokenizer.markupDeclarationOpenStatecCs�|jj�}|dkr|j|_n�|dkrN|jjtddd��|jdd7<n�|dkr�|jjtdd	d��|jj|j�|j|_nP|t	kr�|jjtdd
d��|jj|j�|j|_n|jd|7<|j
|_dS)Nr|r[r"zinvalid-codepoint)r#r$r$u�rjzincorrect-commentzeof-in-commentT)rr6�commentStartDashStaterr%r7rrrr�commentState)rr$r r r!r��s(






zHTMLTokenizer.commentStartStatecCs�|jj�}|dkr|j|_n�|dkrN|jjtddd��|jdd7<n�|dkr�|jjtdd	d��|jj|j�|j|_nT|t	kr�|jjtdd
d��|jj|j�|j|_n|jdd|7<|j
|_dS)Nr|r[r"zinvalid-codepoint)r#r$r$u-�rjzincorrect-commentzeof-in-commentT)rr6�commentEndStaterr%r7rrrrr�)rr$r r r!r��s(






z#HTMLTokenizer.commentStartDashStatecCs�|jj�}|dkr|j|_n�|dkrN|jjtddd��|jdd7<nT|tkr�|jjtddd��|jj|j�|j	|_n|jd||jj
d
�7<d	S)Nr|r[r"zinvalid-codepoint)r#r$r$u�zeof-in-commentT)r|r[)rr6�commentEndDashStaterr%r7rrrrr^)rr$r r r!r��s




zHTMLTokenizer.commentStatecCs�|jj�}|dkr|j|_n�|dkrV|jjtddd��|jdd7<|j|_nT|t	kr�|jjtddd��|jj|j�|j
|_n|jdd|7<|j|_d	S)
Nr|r[r"zinvalid-codepoint)r#r$r$u-�zeof-in-comment-end-dashT)rr6r�rr%r7rrr�rr)rr$r r r!r��s 





z!HTMLTokenizer.commentEndDashStatecCs,|jj�}|dkr*|jj|j�|j|_n�|dkrd|jjtddd��|jdd7<|j|_n�|dkr�|jjtdd	d��|j	|_n�|d
kr�|jjtddd��|jd|7<nj|t
kr�|jjtddd��|jj|j�|j|_n4|jjtdd
d��|jdd|7<|j|_dS)Nrjr[r"zinvalid-codepoint)r#r$r$u--�rhz,unexpected-bang-after-double-dash-in-commentr|z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T)rr6r%r7rrrrr��commentEndBangStater)rr$r r r!r��s6









zHTMLTokenizer.commentEndStatecCs�|jj�}|dkr*|jj|j�|j|_n�|dkrN|jdd7<|j|_n�|dkr�|jjtddd��|jdd	7<|j	|_nT|t
kr�|jjtdd
d��|jj|j�|j|_n|jdd|7<|j	|_dS)Nrjr|r$z--!r[r"zinvalid-codepoint)r#r$u--!�zeof-in-comment-end-bang-stateT)rr6r%r7rrrr�rr�r)rr$r r r!r��s(






z!HTMLTokenizer.commentEndBangStatecCs�|jj�}|tkr|j|_nj|tkr\|jjtddd��d|j	d<|jj|j	�|j
|_n*|jjtddd��|jj|�|j|_dS)Nr"z!expected-doctype-name-but-got-eof)r#r$Fr�zneed-space-after-doctypeT)rr6r�beforeDoctypeNameStaterrr%r7rrrr=)rr$r r r!r�s





zHTMLTokenizer.doctypeStatecCs�|jj�}|tkrn�|dkrT|jjtddd��d|jd<|jj|j�|j|_n�|dkr�|jjtddd��d	|jd
<|j	|_nR|t
kr�|jjtddd��d|jd<|jj|j�|j|_n||jd
<|j	|_dS)
Nrjr"z+expected-doctype-name-but-got-right-bracket)r#r$Fr�r[zinvalid-codepointu�rUz!expected-doctype-name-but-got-eofT)rr6rr%r7rrrr�doctypeNameStater)rr$r r r!r�s.










z$HTMLTokenizer.beforeDoctypeNameStatecCs|jj�}|tkr2|jdjt�|jd<|j|_n�|dkrh|jdjt�|jd<|jj	|j�|j
|_n�|dkr�|jj	tddd��|jdd7<|j|_nh|t
kr�|jj	tddd��d	|jd
<|jdjt�|jd<|jj	|j�|j
|_n|jd|7<dS)NrUrjr[r"zinvalid-codepoint)r#r$u�zeof-in-doctype-nameFr�T)rr6rrrXr�afterDoctypeNameStaterr%r7rrr�r)rr$r r r!r�6s,







zHTMLTokenizer.doctypeNameStatecCsR|jj�}|tkr�n8|dkr8|jj|j�|j|_�n|tkr�d|jd<|jj	|�|jjt
ddd��|jj|j�|j|_�n�|d!kr�d	}x$d'D]}|jj�}||kr�d}Pq�W|r�|j|_d	SnJ|d(k�rd	}x(d.D] }|jj�}||k�r�d}P�q�W|�r|j|_d	S|jj	|�|jjt
ddd|id ��d|jd<|j
|_d	S)/NrjFr�r"zeof-in-doctype)r#r$r�r�T�u�U�b�B�l�L�i�Ir@r��s�Sr�r�r�r�r�r��m�Mz*expected-space-or-right-bracket-in-doctyper$)r#r$r.)r�r��r�r��r�r��r�r��r�r��r@r�)r�r�r�r�r�)r�r��r�r��r�r��r�r��r�r��r�r�)r�r�r�r�r�)rr6rr%r7rrrrr=r�afterDoctypePublicKeywordState�afterDoctypeSystemKeywordState�bogusDoctypeState)rr$r�r�r r r!r�OsT







z#HTMLTokenizer.afterDoctypeNameStatecCs�|jj�}|tkr|j|_n�|d
krP|jjtddd��|jj|�|j|_nT|t	kr�|jjtddd��d|j
d<|jj|j
�|j|_n|jj|�|j|_d	S)Nr�r�r"zunexpected-char-in-doctype)r#r$zeof-in-doctypeFr�T)r�r�)rr6r�"beforeDoctypePublicIdentifierStaterr%r7rr=rrr)rr$r r r!r��s"






z,HTMLTokenizer.afterDoctypePublicKeywordStatecCs�|jj�}|tkrn�|dkr0d|jd<|j|_n�|dkrLd|jd<|j|_n�|dkr�|jjt	ddd��d	|jd
<|jj|j�|j
|_nh|tkr�|jjt	ddd��d	|jd
<|jj|j�|j
|_n(|jjt	ddd��d	|jd
<|j|_d
S)Nr�r,r�r�rjr"zunexpected-end-of-doctype)r#r$Fr�zeof-in-doctypezunexpected-char-in-doctypeT)
rr6rr�(doctypePublicIdentifierDoubleQuotedStater�(doctypePublicIdentifierSingleQuotedStater%r7rrrr�)rr$r r r!r��s4












z0HTMLTokenizer.beforeDoctypePublicIdentifierStatecCs�|jj�}|dkr|j|_n�|dkrN|jjtddd��|jdd7<n�|dkr�|jjtdd	d��d
|jd<|jj|j�|j|_nR|t	kr�|jjtddd��d
|jd<|jj|j�|j|_n|jd|7<d
S)Nr�r[r"zinvalid-codepoint)r#r$r�u�rjzunexpected-end-of-doctypeFr�zeof-in-doctypeT)
rr6�!afterDoctypePublicIdentifierStaterr%r7rrrr)rr$r r r!r��s*








z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatecCs�|jj�}|dkr|j|_n�|dkrN|jjtddd��|jdd7<n�|dkr�|jjtdd	d��d
|jd<|jj|j�|j|_nR|t	kr�|jjtddd��d
|jd<|jj|j�|j|_n|jd|7<d
S)Nr�r[r"zinvalid-codepoint)r#r$r�u�rjzunexpected-end-of-doctypeFr�zeof-in-doctypeT)
rr6r�rr%r7rrrr)rr$r r r!r��s*








z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatecCs|jj�}|tkr|j|_n�|dkr<|jj|j�|j|_n�|dkrn|jjt	ddd��d|jd<|j
|_n�|dkr�|jjt	ddd��d|jd<|j|_nh|tkr�|jjt	dd	d��d
|jd<|jj|j�|j|_n(|jjt	ddd��d
|jd<|j
|_dS)
Nrjr�r"zunexpected-char-in-doctype)r#r$r,r�r�zeof-in-doctypeFr�T)rr6r�-betweenDoctypePublicAndSystemIdentifiersStaterr%r7rrr�(doctypeSystemIdentifierDoubleQuotedState�(doctypeSystemIdentifierSingleQuotedStaterr�)rr$r r r!r��s6













z/HTMLTokenizer.afterDoctypePublicIdentifierStatecCs�|jj�}|tkrn�|dkr4|jj|j�|j|_n�|dkrPd|jd<|j|_n�|dkrld|jd<|j	|_nh|t
kr�|jjtddd��d	|jd
<|jj|j�|j|_n(|jjtddd��d	|jd
<|j|_dS)
Nrjr�r,r�r�r"zeof-in-doctype)r#r$Fr�zunexpected-char-in-doctypeT)
rr6rr%r7rrrr�r�rrr�)rr$r r r!r�s.










z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatecCs�|jj�}|tkr|j|_n�|d
krP|jjtddd��|jj|�|j|_nT|t	kr�|jjtddd��d|j
d<|jj|j
�|j|_n|jj|�|j|_d	S)Nr�r�r"zunexpected-char-in-doctype)r#r$zeof-in-doctypeFr�T)r�r�)rr6r�"beforeDoctypeSystemIdentifierStaterr%r7rr=rrr)rr$r r r!r�s"






z,HTMLTokenizer.afterDoctypeSystemKeywordStatecCs�|jj�}|tkrn�|dkr0d|jd<|j|_n�|dkrLd|jd<|j|_n�|dkr�|jjt	ddd��d	|jd
<|jj|j�|j
|_nh|tkr�|jjt	ddd��d	|jd
<|jj|j�|j
|_n(|jjt	ddd��d	|jd
<|j|_dS)
Nr�r,r�r�rjr"zunexpected-char-in-doctype)r#r$Fr�zeof-in-doctypeT)
rr6rrr�rr�r%r7rrrr�)rr$r r r!r�/s4












z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatecCs�|jj�}|dkr|j|_n�|dkrN|jjtddd��|jdd7<n�|dkr�|jjtdd	d��d
|jd<|jj|j�|j|_nR|t	kr�|jjtddd��d
|jd<|jj|j�|j|_n|jd|7<d
S)Nr�r[r"zinvalid-codepoint)r#r$r�u�rjzunexpected-end-of-doctypeFr�zeof-in-doctypeT)
rr6�!afterDoctypeSystemIdentifierStaterr%r7rrrr)rr$r r r!r�Ls*








z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatecCs�|jj�}|dkr|j|_n�|dkrN|jjtddd��|jdd7<n�|dkr�|jjtdd	d��d
|jd<|jj|j�|j|_nR|t	kr�|jjtddd��d
|jd<|jj|j�|j|_n|jd|7<d
S)Nr�r[r"zinvalid-codepoint)r#r$r�u�rjzunexpected-end-of-doctypeFr�zeof-in-doctypeT)
rr6r�rr%r7rrrr)rr$r r r!r�ds*








z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatecCs�|jj�}|tkrn~|dkr4|jj|j�|j|_n^|tkrt|jjt	ddd��d|jd<|jj|j�|j|_n|jjt	ddd��|j
|_dS)	Nrjr"zeof-in-doctype)r#r$Fr�zunexpected-char-in-doctypeT)rr6rr%r7rrrrrr�)rr$r r r!r�|s 





z/HTMLTokenizer.afterDoctypeSystemIdentifierStatecCsZ|jj�}|dkr*|jj|j�|j|_n,|tkrV|jj|�|jj|j�|j|_ndS)NrjT)	rr6r%r7rrrrr=)rr$r r r!r��s


zHTMLTokenizer.bogusDoctypeStatecCs�g}xt|j|jjd��|j|jjd��|jj�}|tkr@Pq|ddd�dkrl|ddd�|d<Pq|j|�qWdj|�}|jd�}|dkr�x&t|�D]}|jjt	d	d
d��q�W|j
dd�}|r�|jjt	d
|d��|j|_dS)N�]rjr�z]]r,r[rr"zinvalid-codepoint)r#r$u�rJTrK���rKr�rK)
r7rr^r6rr9�count�ranger%rr�rr)rr$r6Z	nullCountr�r r r!r��s.



zHTMLTokenizer.cdataSectionState)N)NF)N�__name__�
__module__�__qualname__�__doc__rr)rBrSrTrZrr\rbr`rdrfrgr]rmrnrarsrtrcrwrxreryr{rzr}r�rr~r�r�r�r�r�r�r�r�r�rpr�r�r�r�r�r�r�rqrorlr�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r��
__classcell__r r )rr!rs�H
P#

6 "-3rN)Z
__future__rrrZsixrr;�collectionsrZ	constantsrr	r
rrr
rrrrZ_inputstreamrZ_trierrL�objectrr r r r!�<module>s

Sindbad File Manager Version 1.0, Coded By Sindbad EG ~ The Terrorists