You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
69 lines
1.6 KiB
69 lines
1.6 KiB
#ifndef ROBOTS_CPP_H
|
|
#define ROBOTS_CPP_H
|
|
|
|
#include <sstream>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#include "agent.h"
|
|
|
|
namespace Rep
|
|
{
|
|
|
|
class Robots
|
|
{
|
|
public:
|
|
typedef std::unordered_map<std::string, Agent> agent_map_t;
|
|
typedef std::vector<std::string> sitemaps_t;
|
|
|
|
/**
|
|
* Create a robots.txt from a utf-8-encoded string.
|
|
*/
|
|
Robots(const std::string& content);
|
|
|
|
/**
|
|
* Instantiate a Robots object.
|
|
*/
|
|
Robots(
|
|
const agent_map_t& agents,
|
|
const sitemaps_t& sitemaps)
|
|
: agents_(agents)
|
|
, sitemaps_(sitemaps)
|
|
, default_(agents_["*"]) {}
|
|
|
|
/**
|
|
* Get the sitemaps in this robots.txt
|
|
*/
|
|
const sitemaps_t& sitemaps() const { return sitemaps_; }
|
|
|
|
/**
|
|
* Get the agent with the corresponding name.
|
|
*/
|
|
const Agent& agent(const std::string& name) const;
|
|
|
|
/**
|
|
* Return true if agent is allowed to fetch the URL (either a
|
|
* full URL or a path).
|
|
*/
|
|
bool allowed(const std::string& path, const std::string& name) const;
|
|
|
|
std::string str() const;
|
|
|
|
/**
|
|
* Return the robots.txt URL corresponding to the provided URL.
|
|
*/
|
|
static std::string robotsUrl(const std::string& url);
|
|
|
|
private:
|
|
static void strip(std::string& string);
|
|
|
|
static bool getpair(
|
|
std::istringstream& stream, std::string& key, std::string& value);
|
|
|
|
agent_map_t agents_;
|
|
sitemaps_t sitemaps_;
|
|
Agent& default_;
|
|
};
|
|
}
|
|
|
|
#endif
|
|
|