123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279 |
- <?php
- namespace Egulias\EmailValidator;
- use Doctrine\Common\Lexer\AbstractLexer;
- class EmailLexer extends AbstractLexer
- {
- //ASCII values
- const C_DEL = 127;
- const C_NUL = 0;
- const S_AT = 64;
- const S_BACKSLASH = 92;
- const S_DOT = 46;
- const S_DQUOTE = 34;
- const S_OPENPARENTHESIS = 49;
- const S_CLOSEPARENTHESIS = 261;
- const S_OPENBRACKET = 262;
- const S_CLOSEBRACKET = 263;
- const S_HYPHEN = 264;
- const S_COLON = 265;
- const S_DOUBLECOLON = 266;
- const S_SP = 267;
- const S_HTAB = 268;
- const S_CR = 269;
- const S_LF = 270;
- const S_IPV6TAG = 271;
- const S_LOWERTHAN = 272;
- const S_GREATERTHAN = 273;
- const S_COMMA = 274;
- const S_SEMICOLON = 275;
- const S_OPENQBRACKET = 276;
- const S_CLOSEQBRACKET = 277;
- const S_SLASH = 278;
- const S_EMPTY = null;
- const GENERIC = 300;
- const CRLF = 301;
- const INVALID = 302;
- const ASCII_INVALID_FROM = 127;
- const ASCII_INVALID_TO = 199;
- /**
- * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
- *
- * @var array
- */
- protected $charValue = array(
- '(' => self::S_OPENPARENTHESIS,
- ')' => self::S_CLOSEPARENTHESIS,
- '<' => self::S_LOWERTHAN,
- '>' => self::S_GREATERTHAN,
- '[' => self::S_OPENBRACKET,
- ']' => self::S_CLOSEBRACKET,
- ':' => self::S_COLON,
- ';' => self::S_SEMICOLON,
- '@' => self::S_AT,
- '\\' => self::S_BACKSLASH,
- '/' => self::S_SLASH,
- ',' => self::S_COMMA,
- '.' => self::S_DOT,
- '"' => self::S_DQUOTE,
- '-' => self::S_HYPHEN,
- '::' => self::S_DOUBLECOLON,
- ' ' => self::S_SP,
- "\t" => self::S_HTAB,
- "\r" => self::S_CR,
- "\n" => self::S_LF,
- "\r\n" => self::CRLF,
- 'IPv6' => self::S_IPV6TAG,
- '{' => self::S_OPENQBRACKET,
- '}' => self::S_CLOSEQBRACKET,
- '' => self::S_EMPTY,
- '\0' => self::C_NUL,
- );
- /**
- * @var bool
- */
- protected $hasInvalidTokens = false;
- /**
- * @var array
- *
- * @psalm-var array{value:string, type:null|int, position:int}|array<empty, empty>
- */
- protected $previous = [];
- /**
- * The last matched/seen token.
- *
- * @var array
- *
- * @psalm-var array{value:string, type:null|int, position:int}
- */
- public $token;
- /**
- * The next token in the input.
- *
- * @var array|null
- */
- public $lookahead;
- /**
- * @psalm-var array{value:'', type:null, position:0}
- */
- private static $nullToken = [
- 'value' => '',
- 'type' => null,
- 'position' => 0,
- ];
- public function __construct()
- {
- $this->previous = $this->token = self::$nullToken;
- $this->lookahead = null;
- }
- /**
- * @return void
- */
- public function reset()
- {
- $this->hasInvalidTokens = false;
- parent::reset();
- $this->previous = $this->token = self::$nullToken;
- }
- /**
- * @return bool
- */
- public function hasInvalidTokens()
- {
- return $this->hasInvalidTokens;
- }
- /**
- * @param int $type
- * @throws \UnexpectedValueException
- * @return boolean
- *
- * @psalm-suppress InvalidScalarArgument
- */
- public function find($type)
- {
- $search = clone $this;
- $search->skipUntil($type);
- if (!$search->lookahead) {
- throw new \UnexpectedValueException($type . ' not found');
- }
- return true;
- }
- /**
- * getPrevious
- *
- * @return array
- */
- public function getPrevious()
- {
- return $this->previous;
- }
- /**
- * moveNext
- *
- * @return boolean
- */
- public function moveNext()
- {
- $this->previous = $this->token;
- $hasNext = parent::moveNext();
- $this->token = $this->token ?: self::$nullToken;
- return $hasNext;
- }
- /**
- * Lexical catchable patterns.
- *
- * @return string[]
- */
- protected function getCatchablePatterns()
- {
- return array(
- '[a-zA-Z_]+[46]?', //ASCII and domain literal
- '[^\x00-\x7F]', //UTF-8
- '[0-9]+',
- '\r\n',
- '::',
- '\s+?',
- '.',
- );
- }
- /**
- * Lexical non-catchable patterns.
- *
- * @return string[]
- */
- protected function getNonCatchablePatterns()
- {
- return array('[\xA0-\xff]+');
- }
- /**
- * Retrieve token type. Also processes the token value if necessary.
- *
- * @param string $value
- * @throws \InvalidArgumentException
- * @return integer
- */
- protected function getType(&$value)
- {
- if ($this->isNullType($value)) {
- return self::C_NUL;
- }
- if ($this->isValid($value)) {
- return $this->charValue[$value];
- }
- if ($this->isUTF8Invalid($value)) {
- $this->hasInvalidTokens = true;
- return self::INVALID;
- }
- return self::GENERIC;
- }
- /**
- * @param string $value
- *
- * @return bool
- */
- protected function isValid($value)
- {
- if (isset($this->charValue[$value])) {
- return true;
- }
- return false;
- }
- /**
- * @param string $value
- * @return bool
- */
- protected function isNullType($value)
- {
- if ($value === "\0") {
- return true;
- }
- return false;
- }
- /**
- * @param string $value
- * @return bool
- */
- protected function isUTF8Invalid($value)
- {
- if (preg_match('/\p{Cc}+/u', $value)) {
- return true;
- }
- return false;
- }
- /**
- * @return string
- */
- protected function getModifiers()
- {
- return 'iu';
- }
- }
|