Skip to content

Commit

Permalink
Merge pull request #31 from egulias/18-discrep-isemail
Browse files Browse the repository at this point in the history
#18 Differences with isemail
  • Loading branch information
egulias committed Oct 26, 2014
2 parents 43650e8 + 8b62522 commit 49494c3
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 13 deletions.
65 changes: 60 additions & 5 deletions src/Egulias/EmailValidator/EmailLexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,13 @@ class EmailLexer extends AbstractLexer
const S_SEMICOLON = 275;
const S_OPENQBRACKET = 276;
const S_CLOSEQBRACKET = 277;
const S_SLASH = 278;
const S_EMPTY = null;
const GENERIC = 300;
const CRLF = 301;
const INVALID = 302;
const ASCII_INVALID_FROM = 127;
const ASCII_INVALID_TO = 199;

/**
* US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
Expand All @@ -52,6 +55,7 @@ class EmailLexer extends AbstractLexer
';' => self::S_SEMICOLON,
'@' => self::S_AT,
'\\' => self::S_BACKSLASH,
'/' => self::S_SLASH,
',' => self::S_COMMA,
'.' => self::S_DOT,
'"' => self::S_DQUOTE,
Expand All @@ -67,14 +71,31 @@ class EmailLexer extends AbstractLexer
'>' => self::S_GREATERTHAN,
'{' => self::S_OPENQBRACKET,
'}' => self::S_CLOSEQBRACKET,
'' => self::S_EMPTY
'' => self::S_EMPTY,
'\0' => self::C_NUL,
);

protected $invalidASCII = array(226 => 1,);

protected $hasInvalidTokens = false;

protected $previous;

public function reset()
{
$this->hasInvalidTokens = false;
parent::reset();
}

public function hasInvalidTokens()
{
return $this->hasInvalidTokens;
}

/**
* @param $type
* @throws \UnexpectedValueException
* @return boolean
*/
public function find($type)
{
Expand All @@ -100,7 +121,7 @@ public function getPrevious()
/**
* moveNext
*
* @return mixed
* @return boolean
*/
public function moveNext()
{
Expand All @@ -112,7 +133,7 @@ public function moveNext()
/**
* Lexical catchable patterns.
*
* @return array
* @return string[]
*/
protected function getCatchablePatterns()
{
Expand All @@ -130,7 +151,7 @@ protected function getCatchablePatterns()
/**
* Lexical non-catchable patterns.
*
* @return array
* @return string[]
*/
protected function getNonCatchablePatterns()
{
Expand All @@ -146,14 +167,48 @@ protected function getNonCatchablePatterns()
*/
protected function getType(&$value)
{

if ($this->isNullType($value)) {
return self::C_NUL;
}

if (isset($this->charValue[$value])) {
return $this->charValue[$value];
}

if (preg_match('/[\x10-\x1F]+/', $value)) {
if ($this->isInvalid($value)) {
$this->hasInvalidTokens = true;
return self::INVALID;
}

return self::GENERIC;
}

/**
* @param string $value
*/
protected function isNullType($value)
{
if ($value === "\0") {
return true;
}

return false;
}

/**
* @param string $value
*/
protected function isInvalid($value)
{
if (preg_match('/[\x10-\x1F]+/', $value)) {
return true;
}

if (isset($this->invalidASCII[ord($value)])) {
return true;
}

return false;
}
}
11 changes: 11 additions & 0 deletions src/Egulias/EmailValidator/EmailParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ public function __construct(EmailLexer $lexer)
$this->domainPartParser = new DomainPart($this->lexer);
}

/**
* @param string $str
*/
public function parse($str)
{
$this->lexer->setInput($str);
Expand All @@ -36,6 +39,10 @@ public function parse($str)
throw new \InvalidArgumentException('ERR_NOLOCALPART');
}

if ($this->lexer->hasInvalidTokens()) {
throw new \InvalidArgumentException('ERR_INVALID_ATEXT');
}

$this->localPartParser->parse($str);
$this->domainPartParser->parse($str);

Expand Down Expand Up @@ -78,6 +85,10 @@ protected function hasAtToken()
return true;
}

/**
* @param string $localPart
* @param string $parsedDomainPart
*/
protected function addLongEmailWarning($localPart, $parsedDomainPart)
{
if (strlen($localPart . '@' . $parsedDomainPart) > self::EMAIL_MAX_LENGTH) {
Expand Down
10 changes: 10 additions & 0 deletions src/Egulias/EmailValidator/Parser/DomainPart.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ public function parse($domainPart)
if ($this->lexer->token['type'] === EmailLexer::S_EMPTY) {
throw new \InvalidArgumentException('ERR_NODOMAIN');
}
if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) {
throw new \InvalidArgumentException('ERR_DOMAINHYPHENEND');
}

if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
$this->warnings[] = EmailValidator::DEPREC_COMMENT;
Expand Down Expand Up @@ -103,6 +106,10 @@ protected function doParseDomainPart()
do {
$prev = $this->lexer->getPrevious();

if ($this->lexer->token['type'] === EmailLexer::S_SLASH) {
throw new \InvalidArgumentException('ERR_DOMAIN_CHAR_NOT_ALLOWED');
}

if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
$this->parseComments();
$this->lexer->moveNext();
Expand Down Expand Up @@ -210,6 +217,9 @@ protected function doParseDomainLiteral()
return $addressLiteral;
}

/**
* @param string $addressLiteral
*/
protected function checkIPV4Tag($addressLiteral)
{
$matchesIP = array();
Expand Down
36 changes: 32 additions & 4 deletions src/Egulias/EmailValidator/Parser/LocalPart.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use Egulias\EmailValidator\EmailLexer;
use Egulias\EmailValidator\EmailValidator;
use \InvalidArgumentException;


class LocalPart extends Parser
Expand All @@ -21,8 +22,7 @@ public function parse($localPart)

$closingQuote = $this->checkDQUOTE($closingQuote);
if ($closingQuote && $parseDQuote) {
$this->parseDoubleQuote();
$parseDQuote = false;
$parseDQuote = $this->parseDoubleQuote();
}

if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
Expand Down Expand Up @@ -56,23 +56,51 @@ public function parse($localPart)

protected function parseDoubleQuote()
{
$special = array (
$parseAgain = true;
$special = array(
EmailLexer::S_CR => true,
EmailLexer::S_HTAB => true,
EmailLexer::S_LF => true
);

$invalid = array(
EmailLexer::C_NUL => true,
EmailLexer::S_HTAB => true,
EmailLexer::S_CR => true,
EmailLexer::S_LF => true
);
$setSpecialsWarning = true;

$this->lexer->moveNext();

while ($this->lexer->token['type'] !== EmailLexer::S_DQUOTE && $this->lexer->token) {
$parseAgain = false;
if (isset($special[$this->lexer->token['type']]) && $setSpecialsWarning) {
$this->warnings[] = EmailValidator::CFWS_FWS;
$setSpecialsWarning = false;
}

$this->lexer->moveNext();

if (!$this->escaped() && isset($invalid[$this->lexer->token['type']])) {
throw new InvalidArgumentException("ERR_EXPECTED_ATEXT");
}
}
}

$prev = $this->lexer->getPrevious();

if ($prev['type'] === EmailLexer::S_BACKSLASH) {
if (!$this->checkDQUOTE(false)) {
throw new \InvalidArgumentException("ERR_UNCLOSED_DQUOTE");
}
}

if (!$this->lexer->isNextToken(EmailLexer::S_AT) && $prev['type'] !== EmailLexer::S_BACKSLASH) {
throw new \InvalidArgumentException("ERR_EXPECED_AT");
}

return $parseAgain;
}

protected function isInvalidToken($token, $closingQuote)
{
Expand Down
4 changes: 2 additions & 2 deletions src/Egulias/EmailValidator/Parser/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,7 @@ protected function escaped()

if ($previous['type'] === EmailLexer::S_BACKSLASH
&&
($this->lexer->token['type'] === EmailLexer::S_SP ||
$this->lexer->token['type'] === EmailLexer::S_HTAB)
$this->lexer->token['type'] !== EmailLexer::GENERIC
) {
return true;
}
Expand Down Expand Up @@ -164,6 +163,7 @@ protected function checkDQUOTE($hasClosingQuote)
if ($this->lexer->isNextToken(EmailLexer::GENERIC) && $previous['type'] === EmailLexer::GENERIC) {
throw new \InvalidArgumentException('ERR_EXPECTING_ATEXT');
}

$this->warnings[] = EmailValidator::RFC5321_QUOTEDSTRING;
try {
$this->lexer->find(EmailLexer::S_DQUOTE);
Expand Down
14 changes: 13 additions & 1 deletion tests/egulias/Tests/EmailValidator/EmailLexerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,15 @@ public function testLexerSearchToken()
$this->assertTrue($lexer->find(EmailLexer::S_HTAB));
}

public function testLexerHasInvalidTokens()
{
$lexer = new EmailLexer();
$lexer->setInput(chr(226));
$lexer->moveNext();
$lexer->moveNext();
$this->assertTrue($lexer->hasInvalidTokens());
}

public function getTokens()
{
return array(
Expand All @@ -61,6 +70,7 @@ public function getTokens()
array("\"", EmailLexer::S_DQUOTE),
array("-", EmailLexer::S_HYPHEN),
array("\\", EmailLexer::S_BACKSLASH),
array("/", EmailLexer::S_SLASH),
array("(", EmailLexer::S_OPENPARENTHESIS),
array(")", EmailLexer::S_CLOSEPARENTHESIS),
array('<', EmailLexer::S_LOWERTHAN),
Expand All @@ -74,7 +84,9 @@ public function getTokens()
array('{', EmailLexer::S_OPENQBRACKET),
array('}', EmailLexer::S_CLOSEQBRACKET),
array('', EmailLexer::S_EMPTY),
array(chr(31), EmailLexer::INVALID)
array(chr(31), EmailLexer::INVALID),
array(chr(226), EmailLexer::INVALID),
array(chr(0), EmailLexer::C_NUL)
);
}
}
28 changes: 27 additions & 1 deletion tests/egulias/Tests/EmailValidator/EmailValidatorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ public function getValidEmails()
array('"user,name"@example.com'),
array('"user name"@example.com'),
array('"user@name"@example.com'),
array('"\a"@iana.org'),
array('"test\ test"@iana.org'),
array('""@iana.org'),
array('"\""@iana.org'),
);
}

Expand All @@ -57,9 +61,10 @@ public function testInvalidEmails($email)
public function getInvalidEmails()
{
return array(

array('[email protected]'),
array('example@[email protected]'),
array('(fabien_potencier@example.fr)'),
array('(test_exampel@example.fr)'),
array('example(example)[email protected]'),
array('.example@localhost'),
array('ex\ample@localhost'),
Expand All @@ -72,6 +77,27 @@ public function getInvalidEmails()
array('username@example,com'),
array('usern,[email protected]'),
array('user[na][email protected]'),
array('"""@iana.org'),
array('"\"@iana.org'),
array('"test"[email protected]'),
array('"test""test"@iana.org'),
array('"test"."test"@iana.org'),
array('"test"[email protected]'),
array('"test"' . chr(0) . '@iana.org'),
array('"test\"@iana.org'),
array(chr(226) . '@iana.org'),
array('test@' . chr(226) . '.org'),
array('\r\[email protected]'),
array('\r\n [email protected]'),
array('\r\n \r\[email protected]'),
array('\r\n \r\[email protected]'),
array('\r\n \r\n [email protected]'),
array('[email protected] \r\n'),
array('[email protected] \r\n '),
array('[email protected] \r\n \r\n'),
array('[email protected] \r\n\r\n'),
array('[email protected] \r\n\r\n '),
array('test@iana/icann.org'),
);
}

Expand Down

0 comments on commit 49494c3

Please sign in to comment.