DomainPart.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. <?php
  2. namespace Egulias\EmailValidator\Parser;
  3. use Egulias\EmailValidator\EmailLexer;
  4. use Egulias\EmailValidator\Exception\CharNotAllowed;
  5. use Egulias\EmailValidator\Exception\CommaInDomain;
  6. use Egulias\EmailValidator\Exception\ConsecutiveAt;
  7. use Egulias\EmailValidator\Exception\CRLFAtTheEnd;
  8. use Egulias\EmailValidator\Exception\CRNoLF;
  9. use Egulias\EmailValidator\Exception\DomainHyphened;
  10. use Egulias\EmailValidator\Exception\DotAtEnd;
  11. use Egulias\EmailValidator\Exception\DotAtStart;
  12. use Egulias\EmailValidator\Exception\ExpectingATEXT;
  13. use Egulias\EmailValidator\Exception\ExpectingDomainLiteralClose;
  14. use Egulias\EmailValidator\Exception\ExpectingDTEXT;
  15. use Egulias\EmailValidator\Exception\NoDomainPart;
  16. use Egulias\EmailValidator\Exception\UnopenedComment;
  17. use Egulias\EmailValidator\Warning\AddressLiteral;
  18. use Egulias\EmailValidator\Warning\CFWSWithFWS;
  19. use Egulias\EmailValidator\Warning\DeprecatedComment;
  20. use Egulias\EmailValidator\Warning\DomainLiteral;
  21. use Egulias\EmailValidator\Warning\DomainTooLong;
  22. use Egulias\EmailValidator\Warning\IPV6BadChar;
  23. use Egulias\EmailValidator\Warning\IPV6ColonEnd;
  24. use Egulias\EmailValidator\Warning\IPV6ColonStart;
  25. use Egulias\EmailValidator\Warning\IPV6Deprecated;
  26. use Egulias\EmailValidator\Warning\IPV6DoubleColon;
  27. use Egulias\EmailValidator\Warning\IPV6GroupCount;
  28. use Egulias\EmailValidator\Warning\IPV6MaxGroups;
  29. use Egulias\EmailValidator\Warning\LabelTooLong;
  30. use Egulias\EmailValidator\Warning\ObsoleteDTEXT;
  31. use Egulias\EmailValidator\Warning\TLD;
  32. class DomainPart extends Parser
  33. {
  34. const DOMAIN_MAX_LENGTH = 254;
  35. /**
  36. * @var string
  37. */
  38. protected $domainPart = '';
  39. public function parse($domainPart)
  40. {
  41. $this->lexer->moveNext();
  42. $this->performDomainStartChecks();
  43. $domain = $this->doParseDomainPart();
  44. $prev = $this->lexer->getPrevious();
  45. $length = strlen($domain);
  46. if ($prev['type'] === EmailLexer::S_DOT) {
  47. throw new DotAtEnd();
  48. }
  49. if ($prev['type'] === EmailLexer::S_HYPHEN) {
  50. throw new DomainHyphened();
  51. }
  52. if ($length > self::DOMAIN_MAX_LENGTH) {
  53. $this->warnings[DomainTooLong::CODE] = new DomainTooLong();
  54. }
  55. if ($prev['type'] === EmailLexer::S_CR) {
  56. throw new CRLFAtTheEnd();
  57. }
  58. $this->domainPart = $domain;
  59. }
  60. private function performDomainStartChecks()
  61. {
  62. $this->checkInvalidTokensAfterAT();
  63. $this->checkEmptyDomain();
  64. if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
  65. $this->warnings[DeprecatedComment::CODE] = new DeprecatedComment();
  66. $this->parseDomainComments();
  67. }
  68. }
  69. private function checkEmptyDomain()
  70. {
  71. $thereIsNoDomain = $this->lexer->token['type'] === EmailLexer::S_EMPTY ||
  72. ($this->lexer->token['type'] === EmailLexer::S_SP &&
  73. !$this->lexer->isNextToken(EmailLexer::GENERIC));
  74. if ($thereIsNoDomain) {
  75. throw new NoDomainPart();
  76. }
  77. }
  78. private function checkInvalidTokensAfterAT()
  79. {
  80. if ($this->lexer->token['type'] === EmailLexer::S_DOT) {
  81. throw new DotAtStart();
  82. }
  83. if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) {
  84. throw new DomainHyphened();
  85. }
  86. }
  87. /**
  88. * @return string
  89. */
  90. public function getDomainPart()
  91. {
  92. return $this->domainPart;
  93. }
  94. /**
  95. * @param string $addressLiteral
  96. * @param int $maxGroups
  97. */
  98. public function checkIPV6Tag($addressLiteral, $maxGroups = 8)
  99. {
  100. $prev = $this->lexer->getPrevious();
  101. if ($prev['type'] === EmailLexer::S_COLON) {
  102. $this->warnings[IPV6ColonEnd::CODE] = new IPV6ColonEnd();
  103. }
  104. $IPv6 = substr($addressLiteral, 5);
  105. //Daniel Marschall's new IPv6 testing strategy
  106. $matchesIP = explode(':', $IPv6);
  107. $groupCount = count($matchesIP);
  108. $colons = strpos($IPv6, '::');
  109. if (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) {
  110. $this->warnings[IPV6BadChar::CODE] = new IPV6BadChar();
  111. }
  112. if ($colons === false) {
  113. // We need exactly the right number of groups
  114. if ($groupCount !== $maxGroups) {
  115. $this->warnings[IPV6GroupCount::CODE] = new IPV6GroupCount();
  116. }
  117. return;
  118. }
  119. if ($colons !== strrpos($IPv6, '::')) {
  120. $this->warnings[IPV6DoubleColon::CODE] = new IPV6DoubleColon();
  121. return;
  122. }
  123. if ($colons === 0 || $colons === (strlen($IPv6) - 2)) {
  124. // RFC 4291 allows :: at the start or end of an address
  125. //with 7 other groups in addition
  126. ++$maxGroups;
  127. }
  128. if ($groupCount > $maxGroups) {
  129. $this->warnings[IPV6MaxGroups::CODE] = new IPV6MaxGroups();
  130. } elseif ($groupCount === $maxGroups) {
  131. $this->warnings[IPV6Deprecated::CODE] = new IPV6Deprecated();
  132. }
  133. }
  134. /**
  135. * @return string
  136. */
  137. protected function doParseDomainPart()
  138. {
  139. $domain = '';
  140. $openedParenthesis = 0;
  141. do {
  142. $prev = $this->lexer->getPrevious();
  143. $this->checkNotAllowedChars($this->lexer->token);
  144. if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
  145. $this->parseComments();
  146. $openedParenthesis += $this->getOpenedParenthesis();
  147. $this->lexer->moveNext();
  148. $tmpPrev = $this->lexer->getPrevious();
  149. if ($tmpPrev['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
  150. $openedParenthesis--;
  151. }
  152. }
  153. if ($this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
  154. if ($openedParenthesis === 0) {
  155. throw new UnopenedComment();
  156. } else {
  157. $openedParenthesis--;
  158. }
  159. }
  160. $this->checkConsecutiveDots();
  161. $this->checkDomainPartExceptions($prev);
  162. if ($this->hasBrackets()) {
  163. $this->parseDomainLiteral();
  164. }
  165. $this->checkLabelLength($prev);
  166. if ($this->isFWS()) {
  167. $this->parseFWS();
  168. }
  169. $domain .= $this->lexer->token['value'];
  170. $this->lexer->moveNext();
  171. } while (null !== $this->lexer->token['type']);
  172. return $domain;
  173. }
  174. private function checkNotAllowedChars(array $token)
  175. {
  176. $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true];
  177. if (isset($notAllowed[$token['type']])) {
  178. throw new CharNotAllowed();
  179. }
  180. }
  181. /**
  182. * @return string|false
  183. */
  184. protected function parseDomainLiteral()
  185. {
  186. if ($this->lexer->isNextToken(EmailLexer::S_COLON)) {
  187. $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
  188. }
  189. if ($this->lexer->isNextToken(EmailLexer::S_IPV6TAG)) {
  190. $lexer = clone $this->lexer;
  191. $lexer->moveNext();
  192. if ($lexer->isNextToken(EmailLexer::S_DOUBLECOLON)) {
  193. $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
  194. }
  195. }
  196. return $this->doParseDomainLiteral();
  197. }
  198. /**
  199. * @return string|false
  200. */
  201. protected function doParseDomainLiteral()
  202. {
  203. $IPv6TAG = false;
  204. $addressLiteral = '';
  205. do {
  206. if ($this->lexer->token['type'] === EmailLexer::C_NUL) {
  207. throw new ExpectingDTEXT();
  208. }
  209. if ($this->lexer->token['type'] === EmailLexer::INVALID ||
  210. $this->lexer->token['type'] === EmailLexer::C_DEL ||
  211. $this->lexer->token['type'] === EmailLexer::S_LF
  212. ) {
  213. $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
  214. }
  215. if ($this->lexer->isNextTokenAny(array(EmailLexer::S_OPENQBRACKET, EmailLexer::S_OPENBRACKET))) {
  216. throw new ExpectingDTEXT();
  217. }
  218. if ($this->lexer->isNextTokenAny(
  219. array(EmailLexer::S_HTAB, EmailLexer::S_SP, $this->lexer->token['type'] === EmailLexer::CRLF)
  220. )) {
  221. $this->warnings[CFWSWithFWS::CODE] = new CFWSWithFWS();
  222. $this->parseFWS();
  223. }
  224. if ($this->lexer->isNextToken(EmailLexer::S_CR)) {
  225. throw new CRNoLF();
  226. }
  227. if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH) {
  228. $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
  229. $addressLiteral .= $this->lexer->token['value'];
  230. $this->lexer->moveNext();
  231. $this->validateQuotedPair();
  232. }
  233. if ($this->lexer->token['type'] === EmailLexer::S_IPV6TAG) {
  234. $IPv6TAG = true;
  235. }
  236. if ($this->lexer->token['type'] === EmailLexer::S_CLOSEQBRACKET) {
  237. break;
  238. }
  239. $addressLiteral .= $this->lexer->token['value'];
  240. } while ($this->lexer->moveNext());
  241. $addressLiteral = str_replace('[', '', $addressLiteral);
  242. $addressLiteral = $this->checkIPV4Tag($addressLiteral);
  243. if (false === $addressLiteral) {
  244. return $addressLiteral;
  245. }
  246. if (!$IPv6TAG) {
  247. $this->warnings[DomainLiteral::CODE] = new DomainLiteral();
  248. return $addressLiteral;
  249. }
  250. $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
  251. $this->checkIPV6Tag($addressLiteral);
  252. return $addressLiteral;
  253. }
  254. /**
  255. * @param string $addressLiteral
  256. *
  257. * @return string|false
  258. */
  259. protected function checkIPV4Tag($addressLiteral)
  260. {
  261. $matchesIP = array();
  262. // Extract IPv4 part from the end of the address-literal (if there is one)
  263. if (preg_match(
  264. '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
  265. $addressLiteral,
  266. $matchesIP
  267. ) > 0
  268. ) {
  269. $index = strrpos($addressLiteral, $matchesIP[0]);
  270. if ($index === 0) {
  271. $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
  272. return false;
  273. }
  274. // Convert IPv4 part to IPv6 format for further testing
  275. $addressLiteral = substr($addressLiteral, 0, (int) $index) . '0:0';
  276. }
  277. return $addressLiteral;
  278. }
  279. protected function checkDomainPartExceptions(array $prev)
  280. {
  281. $invalidDomainTokens = array(
  282. EmailLexer::S_DQUOTE => true,
  283. EmailLexer::S_SEMICOLON => true,
  284. EmailLexer::S_GREATERTHAN => true,
  285. EmailLexer::S_LOWERTHAN => true,
  286. );
  287. if (isset($invalidDomainTokens[$this->lexer->token['type']])) {
  288. throw new ExpectingATEXT();
  289. }
  290. if ($this->lexer->token['type'] === EmailLexer::S_COMMA) {
  291. throw new CommaInDomain();
  292. }
  293. if ($this->lexer->token['type'] === EmailLexer::S_AT) {
  294. throw new ConsecutiveAt();
  295. }
  296. if ($this->lexer->token['type'] === EmailLexer::S_OPENQBRACKET && $prev['type'] !== EmailLexer::S_AT) {
  297. throw new ExpectingATEXT();
  298. }
  299. if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) {
  300. throw new DomainHyphened();
  301. }
  302. if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH
  303. && $this->lexer->isNextToken(EmailLexer::GENERIC)) {
  304. throw new ExpectingATEXT();
  305. }
  306. }
  307. /**
  308. * @return bool
  309. */
  310. protected function hasBrackets()
  311. {
  312. if ($this->lexer->token['type'] !== EmailLexer::S_OPENBRACKET) {
  313. return false;
  314. }
  315. try {
  316. $this->lexer->find(EmailLexer::S_CLOSEBRACKET);
  317. } catch (\RuntimeException $e) {
  318. throw new ExpectingDomainLiteralClose();
  319. }
  320. return true;
  321. }
  322. protected function checkLabelLength(array $prev)
  323. {
  324. if ($this->lexer->token['type'] === EmailLexer::S_DOT &&
  325. $prev['type'] === EmailLexer::GENERIC &&
  326. strlen($prev['value']) > 63
  327. ) {
  328. $this->warnings[LabelTooLong::CODE] = new LabelTooLong();
  329. }
  330. }
  331. protected function parseDomainComments()
  332. {
  333. $this->isUnclosedComment();
  334. while (!$this->lexer->isNextToken(EmailLexer::S_CLOSEPARENTHESIS)) {
  335. $this->warnEscaping();
  336. $this->lexer->moveNext();
  337. }
  338. $this->lexer->moveNext();
  339. if ($this->lexer->isNextToken(EmailLexer::S_DOT)) {
  340. throw new ExpectingATEXT();
  341. }
  342. }
  343. protected function addTLDWarnings()
  344. {
  345. if ($this->warnings[DomainLiteral::CODE]) {
  346. $this->warnings[TLD::CODE] = new TLD();
  347. }
  348. }
  349. }