Skip to content

Commit 93ac8fb

Browse files
committed
close to completeing invalid syntax testing
1 parent 1aebf55 commit 93ac8fb

File tree

2 files changed

+192
-181
lines changed

2 files changed

+192
-181
lines changed

src/lib.rs

Lines changed: 43 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,14 @@ lazy_static! {
2929
static ref ATEXT_HOSTNAME_INTL: Regex = Regex::new(r"^[a-zA-Z0-9\-\.\u{0080}-\u{10FFFF}]+$").unwrap();
3030
static ref HOSTNAME_LABEL: &'static str = r"(?:(?:[a-zA-Z0-9][a-zA-Z0-9\-]*)?[a-zA-Z0-9])";
3131
static ref DOT_ATOM_TEXT_HOSTNAME: Regex = Regex::new(&format!(r"^{}(?:\.{})*$", *HOSTNAME_LABEL, *HOSTNAME_LABEL)).unwrap();
32-
static ref DOMAIN_NAME_REGEX: Regex = Regex::new(r"[A-Za-z]\Z").unwrap();
32+
static ref DOMAIN_NAME_REGEX: Regex = Regex::new(r"[A-Za-z]\z").unwrap();
3333

3434
// Domain literal (RFC 5322 3.4.1)
3535
static ref DOMAIN_LITERAL_CHARS: Regex = Regex::new(r"[\u0021-\u00FA\u005E-\u007E]").unwrap();
3636

3737
// See https://www.rfc-editor.org/rfc/rfc5321.html#section-4.1.2
3838
static ref QTEXT_INTL: Regex = Regex::new(r"[\u0020-\u007E\u0080-\u{10FFFF}]").unwrap();
39+
static ref DNS_LABEL_REGEX: Regex = Regex::new(r"(?i)^.{2}--").unwrap();
3940
}
4041

4142
const MAX_ADDRESS_LENGTH: usize = 254;
@@ -154,7 +155,7 @@ impl EmailValidator {
154155
Ok(local.to_string())
155156
} else {
156157
Err(PySyntaxError::new_err(
157-
"There needs to be something before the @-sign",
158+
"Invalid Local Part: The part before the '@' sign cannot be empty.",
158159
))
159160
};
160161
}
@@ -165,7 +166,7 @@ impl EmailValidator {
165166
// Local part length validation
166167
if unquoted_local.len() > MAX_LOCAL_PART_LENGTH {
167168
return Err(PyValueError::new_err(
168-
"The email address is too long before the @-sign",
169+
"Invalid Local Part: The part before the '@' sign exceeds the maximum length (64 chars).",
169170
));
170171
}
171172

@@ -237,36 +238,39 @@ impl EmailValidator {
237238

238239
if !invalid_chars.is_empty() {
239240
return Err(PySyntaxError::new_err(
240-
"The email address contains invalid characters before the @-sign",
241+
"Invalid Local Part: contains invalid characters before the '@' sign.",
241242
));
242243
}
243244

244-
// Check for dot errors
245-
if unquoted_local.starts_with('.')
246-
|| unquoted_local.ends_with('.')
247-
|| unquoted_local.contains("..")
248-
{
249-
return Err(PySyntaxError::new_err("The local part of the email address cannot start or end with a dot, or contain consecutive dots"));
250-
}
245+
// Validates the local part of an email address based on RFC 952, RFC 1123, and RFC 5322.
246+
// Each label must have at least one character and cannot start or end with dashes or periods.
247+
// Consecutive periods and adjacent period-hyphen combinations are also invalid.
248+
_validate_email_label(
249+
local,
250+
"Invalid Local Part: Cannot start with a {}.",
251+
"Invalid Local Part: A {} cannot immediately precede the '@' sign.",
252+
true,
253+
)?;
251254

252-
// Fallback error for unhandled cases
253255
Err(PySyntaxError::new_err(
254-
"The email address contains invalid characters before the @-sign.",
256+
"Invalid Local Part: contains invalid characters before the '@' sign.",
255257
))
256258
}
257259

258260
fn _validate_domain(&self, domain: &str) -> PyResult<ValidatedDomain> {
259261
// Guard clause if domain is being executed independently
260262
if domain.is_empty() {
261263
return Err(PySyntaxError::new_err(
262-
"There needs to be something after the @",
264+
"Invalid Domain: The part after the '@' sign cannot be empty.",
263265
));
264266
}
265267

266268
// Address Literals
267269
if domain.starts_with('[') && domain.ends_with(']') {
268270
if !self.allow_domain_literal {
269-
return Err(PyValueError::new_err("Domain Literals are not allowed"));
271+
return Err(PyValueError::new_err(
272+
"Invalid Domain: A bracketed IP address after the '@' sign is not permitted.",
273+
));
270274
}
271275

272276
let domain_literal = &domain[1..domain.len() - 1];
@@ -303,7 +307,7 @@ impl EmailValidator {
303307
// Check for invalid characters in the domain part
304308
if !ATEXT_HOSTNAME_INTL.is_match(domain.as_bytes()) {
305309
return Err(PySyntaxError::new_err(
306-
"The part after the @-sign contains invalid characters.",
310+
"Invalid Domain: Contains invalid characters after '@' sign.",
307311
));
308312
}
309313

@@ -320,7 +324,7 @@ impl EmailValidator {
320324
)
321325
.map_err(|_| {
322326
PySyntaxError::new_err(
323-
"Invalid Domain: Invalid characters after '@' sign post Unicode normalization.",
327+
"Invalid Domain: Contains invalid characters after '@' sign post Unicode normalization.",
324328
)
325329
})?;
326330

@@ -334,7 +338,7 @@ impl EmailValidator {
334338
// Validates the domain part of an email address based on RFC 952, RFC 1123, and RFC 5322.
335339
// Each label must have at least one character and cannot start or end with dashes or periods.
336340
// Consecutive periods and adjacent period-hyphen combinations are also invalid.
337-
_validate_email_domain_label(
341+
_validate_email_label(
338342
&normalized_domain,
339343
"Invalid Domain: A {} cannot immediately follow the '@' symbol.",
340344
"Invalid Domain: A {} cannot appear at the end of the domain.",
@@ -351,14 +355,19 @@ impl EmailValidator {
351355
if label.len() > MAX_DNS_LABEL_LENGTH {
352356
return Err(PyValueError::new_err("The DNS label is too long"));
353357
}
354-
// if label.starts_with('-') || label.ends_with('-') {
355-
// return Err(PySyntaxError::new_err(
356-
// "Invalid Domain: cannot start or end with a hyphen.",
357-
// ));
358-
// }
358+
359359
if label.is_empty() {
360360
return Err(PySyntaxError::new_err("The DNS label cannot be empty"));
361361
}
362+
363+
// Check for two letters followed by two dashes
364+
if DNS_LABEL_REGEX.is_match(label.as_bytes())
365+
&& !label.to_lowercase().starts_with("xn--")
366+
{
367+
return Err(PySyntaxError::new_err(
368+
"Invalid Domain: Two letters followed by two dashes ('--') are not allowed immediately after the '@' sign or a period.",
369+
));
370+
}
362371
}
363372

364373
if self.deliverable_address {
@@ -369,11 +378,13 @@ impl EmailValidator {
369378
));
370379
}
371380

372-
// if !DOMAIN_NAME_REGEX.is_match(normalized_domain.as_bytes()) {
373-
// return Err(PySyntaxError::new_err(
374-
// "The part after the @-sign is not valid",
375-
// ));
376-
// }
381+
// TLDs must end with a letter.
382+
if !DOMAIN_NAME_REGEX.is_match(normalized_domain.as_bytes()) {
383+
return Err(PySyntaxError::new_err(
384+
// "The part after the @-sign is not valid. It is not within a valid top-level domain.",
385+
"Invalid domain: The part after the '@' sign does not belong to a valid top-level domain (TLD).",
386+
));
387+
}
377388
}
378389

379390
// Check for reserved and "special use" domains
@@ -443,12 +454,14 @@ fn _split_email(email: &str) -> Result<(String, String), PyErr> {
443454

444455
fn _validate_email_length(local_part: &str, domain: &str) -> Result<(), PyErr> {
445456
if local_part.len() + domain.len() + 1 > MAX_ADDRESS_LENGTH {
446-
return Err(PyValueError::new_err("The email is too long"));
457+
return Err(PyValueError::new_err(
458+
"Invalid Email Address: The email exceeds the maximum length (254 chars).",
459+
));
447460
}
448461
Ok(())
449462
}
450463

451-
fn _validate_email_domain_label(
464+
fn _validate_email_label(
452465
label: &str,
453466
beg_descr: &str,
454467
end_descr: &str,

0 commit comments

Comments
 (0)