Extract normalised domain names with tldextract

This commit is contained in:
Ana Custura 2024-04-01 15:46:55 +01:00
parent 7d4233c9e8
commit dbe8a5d869

View file

@ -1,4 +1,5 @@
import json import json
import tldextract
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Optional, List, Union, Any, Dict from typing import Optional, List, Union, Any, Dict
@ -55,7 +56,8 @@ class Origin(AbstractConfiguration):
@property @property
def normalised_domain_name(self): def normalised_domain_name(self):
return self.domain_name.replace("www.", "") extracted_domain = tldextract.extract(self.domain_name)
return extracted_domain.registered_domain
def onion(self) -> Optional[str]: def onion(self) -> Optional[str]:
tld = extract(self.domain_name).registered_domain tld = extract(self.domain_name).registered_domain