You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
102 lines
3.2 KiB
102 lines
3.2 KiB
#ifndef PSL_CPP_H
|
|
#define PSL_CPP_H
|
|
|
|
#include <istream>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <utility>
|
|
|
|
namespace Url
|
|
{
|
|
|
|
/**
|
|
* Find TLDs and PLDs of a hostname according to a PSL.
|
|
*/
|
|
struct PSL
|
|
{
|
|
/**
|
|
* Indicates the there is no TLD / PLD
|
|
*/
|
|
static const std::string not_found;
|
|
|
|
/**
|
|
* Read a PSL from an istream.
|
|
*/
|
|
PSL(std::istream& stream);
|
|
|
|
PSL(): levels() { };
|
|
|
|
PSL(const PSL& other): levels(other.levels) { }
|
|
|
|
PSL& operator=(const PSL& other)
|
|
{
|
|
levels = other.levels;
|
|
return *this;
|
|
}
|
|
|
|
/**
|
|
* Read the provided path holding a set of PSL rules.
|
|
*/
|
|
static PSL fromPath(const std::string& path);
|
|
|
|
/**
|
|
* Create a PSL object from a string.
|
|
*/
|
|
static PSL fromString(const std::string& str);
|
|
|
|
/**
|
|
* Get just the TLD of the hostname.
|
|
*
|
|
* Works if the hostname is _either_ punycoded or unpunycoded, but not mixed. If
|
|
* some segments have been appropriately punycoded and others not, it may return
|
|
* a wrong answer. If a punycoded host is provided, a punycoded response is
|
|
* returned. If an unpunycoded host is provided, an unpunycoded response is
|
|
* returned.
|
|
*/
|
|
std::string getTLD(const std::string& hostname) const;
|
|
|
|
/**
|
|
* Get just the PLD of the hostname.
|
|
*
|
|
* Works if the hostname is _either_ punycoded or unpunycoded, but not mixed. If
|
|
* some segments have been appropriately punycoded and others not, it may return
|
|
* a wrong answer. If a punycoded host is provided, a punycoded response is
|
|
* returned. If an unpunycoded host is provided, an unpunycoded response is
|
|
* returned.
|
|
*/
|
|
std::string getPLD(const std::string& hostname) const;
|
|
|
|
/**
|
|
* Get the (TLD, PLD) of the hostname.
|
|
*
|
|
* Works if the hostname is _either_ punycoded or unpunycoded, but not mixed. If
|
|
* some segments have been appropriately punycoded and others not, it may return
|
|
* a wrong answer. If a punycoded host is provided, a punycoded response is
|
|
* returned. If an unpunycoded host is provided, an unpunycoded response is
|
|
* returned.
|
|
*/
|
|
std::pair<std::string, std::string> getBoth(const std::string& hostname) const;
|
|
private:
|
|
// Mapping of a string rule to its level
|
|
std::unordered_map<std::string, size_t> levels;
|
|
|
|
// Return the number of segments in a hostname
|
|
size_t countSegments(const std::string& hostname) const;
|
|
|
|
// Return the number of segments in the TLD of the provided hostname
|
|
size_t getTLDLength(const std::string& hostname) const;
|
|
|
|
// Return the last `segments` segments of a hostname
|
|
std::string getLastSegments(const std::string& hostname, size_t segments) const;
|
|
|
|
/**
|
|
* Add the provided host with the provided priority, trimming characters off
|
|
* the front, and adjusting the level by the provided number.
|
|
*/
|
|
void add(std::string& host, int level_adjust, size_t trim);
|
|
};
|
|
|
|
}
|
|
|
|
#endif
|
|
|