boB Rudis
6 years ago
9 changed files with 3 additions and 6484 deletions
Before Width: | Height: | Size: 74 KiB After Width: | Height: | Size: 72 KiB |
@ -1,3 +1,3 @@ |
|||||
CXX_STD = CXX11 |
CXX_STD = CXX11 |
||||
PKG_CXXFLAGS = |
PKG_CXXFLAGS = |
||||
PKG_LIBS = -L. -liconv -lidn2 |
PKG_LIBS = -L. -lpsl |
@ -1,147 +0,0 @@ |
|||||
/* config.h. Generated from config.h.in by configure. */ |
|
||||
/* config.h.in. Generated from configure.ac by autoheader. */ |
|
||||
|
|
||||
/* generate PSL data using libicu */ |
|
||||
/* #undef BUILTIN_GENERATOR_LIBICU */ |
|
||||
|
|
||||
/* generate PSL data using libidn */ |
|
||||
/* #undef BUILTIN_GENERATOR_LIBIDN */ |
|
||||
|
|
||||
/* generate PSL data using libidn2 */ |
|
||||
#define BUILTIN_GENERATOR_LIBIDN2 1 |
|
||||
|
|
||||
/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
|
|
||||
systems. This function is required for `alloca.c' support on those systems. |
|
||||
*/ |
|
||||
/* #undef CRAY_STACKSEG_END */ |
|
||||
|
|
||||
/* Define to 1 if using `alloca.c'. */ |
|
||||
/* #undef C_ALLOCA */ |
|
||||
|
|
||||
/* Define to 1 if translation of program messages to the user's native
|
|
||||
language is requested. */ |
|
||||
/* #undef ENABLE_NLS */ |
|
||||
|
|
||||
/* Define to 1 if you have `alloca', as a function or macro. */ |
|
||||
#define HAVE_ALLOCA 1 |
|
||||
|
|
||||
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
|
|
||||
*/ |
|
||||
#define HAVE_ALLOCA_H 1 |
|
||||
|
|
||||
/* Define to 1 if you have the MacOS X function CFLocaleCopyCurrent in the
|
|
||||
CoreFoundation framework. */ |
|
||||
#define HAVE_CFLOCALECOPYCURRENT 1 |
|
||||
|
|
||||
/* Define to 1 if you have the MacOS X function CFPreferencesCopyAppValue in
|
|
||||
the CoreFoundation framework. */ |
|
||||
#define HAVE_CFPREFERENCESCOPYAPPVALUE 1 |
|
||||
|
|
||||
/* Define to 1 if you have the `clock_gettime' function. */ |
|
||||
#define HAVE_CLOCK_GETTIME 1 |
|
||||
|
|
||||
/* Define if the GNU dcgettext() function is already present or preinstalled.
|
|
||||
*/ |
|
||||
/* #undef HAVE_DCGETTEXT */ |
|
||||
|
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */ |
|
||||
#define HAVE_DLFCN_H 1 |
|
||||
|
|
||||
/* Define to 1 if you have the `fmemopen' function. */ |
|
||||
#define HAVE_FMEMOPEN 1 |
|
||||
|
|
||||
/* Define if the GNU gettext() function is already present or preinstalled. */ |
|
||||
/* #undef HAVE_GETTEXT */ |
|
||||
|
|
||||
/* Define if you have the iconv() function and it works. */ |
|
||||
#define HAVE_ICONV 1 |
|
||||
|
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */ |
|
||||
#define HAVE_INTTYPES_H 1 |
|
||||
|
|
||||
/* Define to 1 if you have the <memory.h> header file. */ |
|
||||
#define HAVE_MEMORY_H 1 |
|
||||
|
|
||||
/* Define to 1 if you have the `nl_langinfo' function. */ |
|
||||
#define HAVE_NL_LANGINFO 1 |
|
||||
|
|
||||
/* Define to 1 if you have the <stdint.h> header file. */ |
|
||||
#define HAVE_STDINT_H 1 |
|
||||
|
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */ |
|
||||
#define HAVE_STDLIB_H 1 |
|
||||
|
|
||||
/* Define to 1 if you have the <strings.h> header file. */ |
|
||||
#define HAVE_STRINGS_H 1 |
|
||||
|
|
||||
/* Define to 1 if you have the <string.h> header file. */ |
|
||||
#define HAVE_STRING_H 1 |
|
||||
|
|
||||
/* Define to 1 if you have the `strndup' function. */ |
|
||||
#define HAVE_STRNDUP 1 |
|
||||
|
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */ |
|
||||
#define HAVE_SYS_STAT_H 1 |
|
||||
|
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */ |
|
||||
#define HAVE_SYS_TYPES_H 1 |
|
||||
|
|
||||
/* Define to 1 if you have the <unistd.h> header file. */ |
|
||||
#define HAVE_UNISTD_H 1 |
|
||||
|
|
||||
/* Define to 1 or 0, depending whether the compiler supports simple visibility
|
|
||||
declarations. */ |
|
||||
#define HAVE_VISIBILITY 1 |
|
||||
|
|
||||
/* Define as const if the declaration of iconv() needs const. */ |
|
||||
#define ICONV_CONST |
|
||||
|
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */ |
|
||||
#define LT_OBJDIR ".libs/" |
|
||||
|
|
||||
/* Define to the address where bug reports for this package should be sent. */ |
|
||||
#define PACKAGE_BUGREPORT "tim.ruehsen@gmx.de" |
|
||||
|
|
||||
/* Define to the full name of this package. */ |
|
||||
#define PACKAGE_NAME "libpsl" |
|
||||
|
|
||||
/* Define to the full name and version of this package. */ |
|
||||
#define PACKAGE_STRING "libpsl 0.20.2" |
|
||||
|
|
||||
/* Define to the one symbol short name of this package. */ |
|
||||
#define PACKAGE_TARNAME "libpsl" |
|
||||
|
|
||||
/* Define to the home page for this package. */ |
|
||||
#define PACKAGE_URL "https://github.com/rockdaboot/libpsl"
|
|
||||
|
|
||||
/* Define to the version of this package. */ |
|
||||
#define PACKAGE_VERSION "0.20.2" |
|
||||
|
|
||||
/* If using the C implementation of alloca, define if you know the
|
|
||||
direction of stack growth for your system; otherwise it will be |
|
||||
automatically deduced at runtime. |
|
||||
STACK_DIRECTION > 0 => grows toward higher addresses |
|
||||
STACK_DIRECTION < 0 => grows toward lower addresses |
|
||||
STACK_DIRECTION = 0 => direction of growth unknown */ |
|
||||
/* #undef STACK_DIRECTION */ |
|
||||
|
|
||||
/* Define to 1 if you have the ANSI C header files. */ |
|
||||
#define STDC_HEADERS 1 |
|
||||
|
|
||||
/* generate PSL data using libicu */ |
|
||||
/* #undef WITH_LIBICU */ |
|
||||
|
|
||||
/* generate PSL data using libidn */ |
|
||||
/* #undef WITH_LIBIDN */ |
|
||||
|
|
||||
/* generate PSL data using libidn2 */ |
|
||||
#define WITH_LIBIDN2 1 |
|
||||
|
|
||||
/* Define to `__inline__' or `__inline' if that's what the C compiler
|
|
||||
calls it, or to nothing if 'inline' is not supported under any name. */ |
|
||||
#ifndef __cplusplus |
|
||||
/* #undef inline */ |
|
||||
#endif |
|
||||
|
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */ |
|
||||
/* #undef size_t */ |
|
@ -1,212 +0,0 @@ |
|||||
/*
|
|
||||
* Copyright(c) 2014-2018 Tim Ruehsen |
|
||||
* |
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a |
|
||||
* copy of this software and associated documentation files (the "Software"), |
|
||||
* to deal in the Software without restriction, including without limitation |
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
|
||||
* and/or sell copies of the Software, and to permit persons to whom the |
|
||||
* Software is furnished to do so, subject to the following conditions: |
|
||||
* |
|
||||
* The above copyright notice and this permission notice shall be included in |
|
||||
* all copies or substantial portions of the Software. |
|
||||
* |
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
|
||||
* DEALINGS IN THE SOFTWARE. |
|
||||
* |
|
||||
* This file is part of libpsl. |
|
||||
* |
|
||||
* Header file for libpsl library routines |
|
||||
* |
|
||||
* Changelog |
|
||||
* 20.03.2014 Tim Ruehsen created |
|
||||
* |
|
||||
*/ |
|
||||
#ifdef __cplusplus |
|
||||
extern "C" { |
|
||||
#endif |
|
||||
|
|
||||
#ifndef LIBPSL_LIBPSL_H |
|
||||
#define LIBPSL_LIBPSL_H |
|
||||
|
|
||||
#include <stdio.h> |
|
||||
#include <time.h> |
|
||||
|
|
||||
#define PSL_VERSION "0.20.2" |
|
||||
#define PSL_VERSION_MAJOR 0 |
|
||||
#define PSL_VERSION_MINOR 20 |
|
||||
#define PSL_VERSION_PATCH 2 |
|
||||
#define PSL_VERSION_NUMBER 0x001402 |
|
||||
|
|
||||
#ifndef PSL_API |
|
||||
#if defined BUILDING_PSL && HAVE_VISIBILITY |
|
||||
# define PSL_API __attribute__ ((__visibility__("default"))) |
|
||||
#elif defined BUILDING_PSL && defined _MSC_VER && !defined PSL_STATIC |
|
||||
# define PSL_API __declspec(dllexport) |
|
||||
#elif defined _MSC_VER && !defined PSL_STATIC |
|
||||
# define PSL_API __declspec(dllimport) |
|
||||
#else |
|
||||
# define PSL_API |
|
||||
#endif |
|
||||
#endif |
|
||||
|
|
||||
#ifdef __cplusplus |
|
||||
extern "C" { |
|
||||
#endif |
|
||||
|
|
||||
/* types for psl_is_public_suffix2() */ |
|
||||
#define PSL_TYPE_ICANN (1<<0) |
|
||||
#define PSL_TYPE_PRIVATE (1<<1) |
|
||||
#define PSL_TYPE_NO_STAR_RULE (1<<2) |
|
||||
#define PSL_TYPE_ANY (PSL_TYPE_ICANN | PSL_TYPE_PRIVATE) |
|
||||
|
|
||||
/**
|
|
||||
* psl_error_t: |
|
||||
* @PSL_SUCCESS: Successful return. |
|
||||
* @PSL_ERR_INVALID_ARG: Invalid argument. |
|
||||
* @PSL_ERR_CONVERTER: Failed to open libicu utf-16 converter. |
|
||||
* @PSL_ERR_TO_UTF16: Failed to convert to utf-16. |
|
||||
* @PSL_ERR_TO_LOWER: Failed to convert utf-16 to lowercase. |
|
||||
* @PSL_ERR_TO_UTF8: Failed to convert utf-16 to utf-8. |
|
||||
* @PSL_ERR_NO_MEM: Failed to allocate memory. |
|
||||
* |
|
||||
* Return codes for PSL functions. |
|
||||
* Negative return codes mean failure. |
|
||||
* Positive values are reserved for non-error return codes. |
|
||||
*/ |
|
||||
typedef enum { |
|
||||
PSL_SUCCESS = 0, |
|
||||
PSL_ERR_INVALID_ARG = -1, |
|
||||
PSL_ERR_CONVERTER = -2, /* failed to open libicu utf-16 converter */ |
|
||||
PSL_ERR_TO_UTF16 = -3, /* failed to convert to utf-16 */ |
|
||||
PSL_ERR_TO_LOWER = -4, /* failed to convert utf-16 to lowercase */ |
|
||||
PSL_ERR_TO_UTF8 = -5, /* failed to convert utf-16 to utf-8 */ |
|
||||
PSL_ERR_NO_MEM = -6 /* failed to allocate memory */ |
|
||||
} psl_error_t; |
|
||||
|
|
||||
typedef struct _psl_ctx_st psl_ctx_t; |
|
||||
|
|
||||
/* frees PSL context */ |
|
||||
PSL_API |
|
||||
void |
|
||||
psl_free(psl_ctx_t *psl); |
|
||||
|
|
||||
/* frees memory allocated by libpsl routines */ |
|
||||
PSL_API |
|
||||
void |
|
||||
psl_free_string(char *str); |
|
||||
|
|
||||
/* loads PSL data from file */ |
|
||||
PSL_API |
|
||||
psl_ctx_t * |
|
||||
psl_load_file(const char *fname); |
|
||||
|
|
||||
/* loads PSL data from FILE pointer */ |
|
||||
PSL_API |
|
||||
psl_ctx_t * |
|
||||
psl_load_fp(FILE *fp); |
|
||||
|
|
||||
/* retrieves builtin PSL data */ |
|
||||
PSL_API |
|
||||
const psl_ctx_t * |
|
||||
psl_builtin(void); |
|
||||
|
|
||||
/* retrieves most recent PSL data */ |
|
||||
PSL_API |
|
||||
psl_ctx_t * |
|
||||
psl_latest(const char *fname); |
|
||||
|
|
||||
/* checks whether domain is a public suffix or not */ |
|
||||
PSL_API |
|
||||
int |
|
||||
psl_is_public_suffix(const psl_ctx_t *psl, const char *domain); |
|
||||
|
|
||||
/* checks whether domain is a public suffix regarding the type or not */ |
|
||||
PSL_API |
|
||||
int |
|
||||
psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type); |
|
||||
|
|
||||
/* checks whether cookie_domain is acceptable for domain or not */ |
|
||||
PSL_API |
|
||||
int |
|
||||
psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain); |
|
||||
|
|
||||
/* returns the longest not registrable domain within 'domain' or NULL if none found */ |
|
||||
PSL_API |
|
||||
const char * |
|
||||
psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain); |
|
||||
|
|
||||
/* returns the shortest possible registrable domain part or NULL if domain is not registrable at all */ |
|
||||
PSL_API |
|
||||
const char * |
|
||||
psl_registrable_domain(const psl_ctx_t *psl, const char *domain); |
|
||||
|
|
||||
/* convert a string into lowercase UTF-8 */ |
|
||||
PSL_API |
|
||||
psl_error_t |
|
||||
psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower); |
|
||||
|
|
||||
/* does not include exceptions */ |
|
||||
PSL_API |
|
||||
int |
|
||||
psl_suffix_count(const psl_ctx_t *psl); |
|
||||
|
|
||||
/* just counts exceptions */ |
|
||||
PSL_API |
|
||||
int |
|
||||
psl_suffix_exception_count(const psl_ctx_t *psl); |
|
||||
|
|
||||
/* just counts wildcards */ |
|
||||
PSL_API |
|
||||
int |
|
||||
psl_suffix_wildcard_count(const psl_ctx_t *psl); |
|
||||
|
|
||||
/* returns mtime of PSL source file */ |
|
||||
PSL_API |
|
||||
time_t |
|
||||
psl_builtin_file_time(void); |
|
||||
|
|
||||
/* returns SHA1 checksum (hex-encoded, lowercase) of PSL source file */ |
|
||||
PSL_API |
|
||||
const char * |
|
||||
psl_builtin_sha1sum(void); |
|
||||
|
|
||||
/* returns file name of PSL source file */ |
|
||||
PSL_API |
|
||||
const char * |
|
||||
psl_builtin_filename(void); |
|
||||
|
|
||||
/* returns name of distribution PSL data file */ |
|
||||
PSL_API |
|
||||
const char * |
|
||||
psl_dist_filename(void); |
|
||||
|
|
||||
/* returns library version string */ |
|
||||
PSL_API |
|
||||
const char * |
|
||||
psl_get_version(void); |
|
||||
|
|
||||
/* checks library version number */ |
|
||||
PSL_API |
|
||||
int |
|
||||
psl_check_version_number(int version); |
|
||||
|
|
||||
/* returns whether the built-in data is outdated or not */ |
|
||||
PSL_API |
|
||||
int |
|
||||
psl_builtin_outdated(void); |
|
||||
|
|
||||
#ifdef __cplusplus |
|
||||
} |
|
||||
#endif |
|
||||
|
|
||||
#endif /* LIBPSL_LIBPSL_H */ |
|
||||
|
|
||||
#ifdef __cplusplus |
|
||||
} |
|
||||
#endif |
|
@ -1,279 +0,0 @@ |
|||||
/* Copyright 2015-2016 The Chromium Authors. All rights reserved.
|
|
||||
* Use of this source code is governed by a BSD-style license that can be |
|
||||
* found in the LICENSE.chromium file. |
|
||||
* |
|
||||
* Converted to C89 2015 by Tim Rühsen |
|
||||
*/ |
|
||||
|
|
||||
#include <stddef.h> |
|
||||
|
|
||||
#if defined(__GNUC__) && defined(__GNUC_MINOR__) |
|
||||
# define _GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) |
|
||||
#else |
|
||||
# define _GCC_VERSION_AT_LEAST(major, minor) 0 |
|
||||
#endif |
|
||||
|
|
||||
#define CHECK_LT(a, b) if ((a) >= b) return 0 |
|
||||
|
|
||||
static const char multibyte_length_table[16] = { |
|
||||
0, 0, 0, 0, /* 0x00-0x3F */ |
|
||||
0, 0, 0, 0, /* 0x40-0x7F */ |
|
||||
0, 0, 0, 0, /* 0x80-0xBF */ |
|
||||
2, 2, 3, 4, /* 0xC0-0xFF */ |
|
||||
}; |
|
||||
|
|
||||
|
|
||||
/*
|
|
||||
* Get length of multibyte character sequence starting at a given byte. |
|
||||
* Returns zero if the byte is not a valid leading byte in UTF-8. |
|
||||
*/ |
|
||||
static int GetMultibyteLength(char c) { |
|
||||
return multibyte_length_table[((unsigned char)c) >> 4]; |
|
||||
} |
|
||||
|
|
||||
/*
|
|
||||
* Moves pointers one byte forward. |
|
||||
*/ |
|
||||
static void NextPos(const unsigned char** pos, |
|
||||
const char** key, |
|
||||
const char** multibyte_start) |
|
||||
{ |
|
||||
++*pos; |
|
||||
if (*multibyte_start) { |
|
||||
/* Advance key to next byte in multibyte sequence. */ |
|
||||
++*key; |
|
||||
/* Reset multibyte_start if last byte in multibyte sequence was consumed. */ |
|
||||
if (*key - *multibyte_start == GetMultibyteLength(**multibyte_start)) |
|
||||
*multibyte_start = 0; |
|
||||
} else { |
|
||||
if (GetMultibyteLength(**key)) { |
|
||||
/* Multibyte prefix was matched in the dafsa, start matching multibyte
|
|
||||
* content in next round. */ |
|
||||
*multibyte_start = *key; |
|
||||
} else { |
|
||||
/* Advance key as a single byte character was matched. */ |
|
||||
++*key; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/*
|
|
||||
* Read next offset from pos. |
|
||||
* Returns true if an offset could be read, false otherwise. |
|
||||
*/ |
|
||||
|
|
||||
static int GetNextOffset(const unsigned char** pos, |
|
||||
const unsigned char* end, |
|
||||
const unsigned char** offset) |
|
||||
{ |
|
||||
size_t bytes_consumed; |
|
||||
|
|
||||
if (*pos == end) |
|
||||
return 0; |
|
||||
|
|
||||
/* When reading an offset the byte array must always contain at least
|
|
||||
* three more bytes to consume. First the offset to read, then a node |
|
||||
* to skip over and finally a destination node. No object can be smaller |
|
||||
* than one byte. */ |
|
||||
CHECK_LT(*pos + 2, end); |
|
||||
switch (**pos & 0x60) { |
|
||||
case 0x60: /* Read three byte offset */ |
|
||||
*offset += (((*pos)[0] & 0x1F) << 16) | ((*pos)[1] << 8) | (*pos)[2]; |
|
||||
bytes_consumed = 3; |
|
||||
break; |
|
||||
case 0x40: /* Read two byte offset */ |
|
||||
*offset += (((*pos)[0] & 0x1F) << 8) | (*pos)[1]; |
|
||||
bytes_consumed = 2; |
|
||||
break; |
|
||||
default: |
|
||||
*offset += (*pos)[0] & 0x3F; |
|
||||
bytes_consumed = 1; |
|
||||
} |
|
||||
if ((**pos & 0x80) != 0) { |
|
||||
*pos = end; |
|
||||
} else { |
|
||||
*pos += bytes_consumed; |
|
||||
} |
|
||||
return 1; |
|
||||
} |
|
||||
|
|
||||
/*
|
|
||||
* Check if byte at offset is last in label. |
|
||||
*/ |
|
||||
|
|
||||
static int IsEOL(const unsigned char* offset, const unsigned char* end) |
|
||||
{ |
|
||||
CHECK_LT(offset, end); |
|
||||
return(*offset & 0x80) != 0; |
|
||||
} |
|
||||
|
|
||||
/*
|
|
||||
* Check if byte at offset matches first character in key. |
|
||||
* This version assumes a range check was already performed by the caller. |
|
||||
*/ |
|
||||
|
|
||||
static int IsMatchUnchecked(const unsigned char matcher, |
|
||||
const char* key, |
|
||||
const char* multibyte_start) |
|
||||
{ |
|
||||
if (multibyte_start) { |
|
||||
/* Multibyte matching mode. */ |
|
||||
if (multibyte_start == key) { |
|
||||
/* Match leading byte, which will also match the sequence length. */ |
|
||||
return (matcher ^ 0x80) == (const unsigned char)*key; |
|
||||
} else { |
|
||||
/* Match following bytes. */ |
|
||||
return (matcher ^ 0xC0) == (const unsigned char)*key; |
|
||||
} |
|
||||
} |
|
||||
/* If key points at a leading byte in a multibyte sequence, but we are not yet
|
|
||||
* in multibyte mode, then the dafsa should contain a special byte to indicate |
|
||||
* a mode switch. */ |
|
||||
if (GetMultibyteLength(*key)) { |
|
||||
return matcher == 0x1F; |
|
||||
} |
|
||||
/* Normal matching of a single byte character. */ |
|
||||
return matcher == (const unsigned char)*key; |
|
||||
} |
|
||||
|
|
||||
/*
|
|
||||
* Check if byte at offset matches first character in key. |
|
||||
* This version matches characters not last in label. |
|
||||
*/ |
|
||||
|
|
||||
static int IsMatch(const unsigned char* offset, |
|
||||
const unsigned char* end, |
|
||||
const char* key, |
|
||||
const char* multibyte_start) |
|
||||
{ |
|
||||
CHECK_LT(offset, end); |
|
||||
return IsMatchUnchecked(*offset, key, multibyte_start); |
|
||||
} |
|
||||
|
|
||||
/*
|
|
||||
* Check if byte at offset matches first character in key. |
|
||||
* This version matches characters last in label. |
|
||||
*/ |
|
||||
|
|
||||
static int IsEndCharMatch(const unsigned char* offset, |
|
||||
const unsigned char* end, |
|
||||
const char* key, |
|
||||
const char* multibyte_start) |
|
||||
{ |
|
||||
CHECK_LT(offset, end); |
|
||||
return IsMatchUnchecked(*offset ^ 0x80, key, multibyte_start); |
|
||||
} |
|
||||
|
|
||||
/*
|
|
||||
* Read return value at offset. |
|
||||
* Returns true if a return value could be read, false otherwise. |
|
||||
*/ |
|
||||
|
|
||||
static int GetReturnValue(const unsigned char* offset, |
|
||||
const unsigned char* end, |
|
||||
const char* multibyte_start, |
|
||||
int* return_value) |
|
||||
{ |
|
||||
CHECK_LT(offset, end); |
|
||||
if (!multibyte_start && (*offset & 0xE0) == 0x80) { |
|
||||
*return_value = *offset & 0x0F; |
|
||||
return 1; |
|
||||
} |
|
||||
return 0; |
|
||||
} |
|
||||
|
|
||||
/*
|
|
||||
* Looks up the string |key| with length |key_length| in a fixed set of |
|
||||
* strings. The set of strings must be known at compile time. It is converted to |
|
||||
* a graph structure named a DAFSA (Deterministic Acyclic Finite State |
|
||||
* Automaton) by the script psl-make-dafsa during compilation. This permits |
|
||||
* efficient (in time and space) lookup. The graph generated by psl-make-dafsa |
|
||||
* takes the form of a constant byte array which should be supplied via the |
|
||||
* |graph| and |length| parameters. The return value is kDafsaNotFound, |
|
||||
* kDafsaFound, or a bitmap consisting of one or more of kDafsaExceptionRule, |
|
||||
* kDafsaWildcardRule and kDafsaPrivateRule ORed together. |
|
||||
* |
|
||||
* Lookup a domain key in a byte array generated by psl-make-dafsa. |
|
||||
*/ |
|
||||
|
|
||||
/* prototype to skip warning with -Wmissing-prototypes */ |
|
||||
int LookupStringInFixedSet(const unsigned char*, size_t,const char*, size_t); |
|
||||
|
|
||||
int LookupStringInFixedSet(const unsigned char* graph, |
|
||||
size_t length, |
|
||||
const char* key, |
|
||||
size_t key_length) |
|
||||
{ |
|
||||
const unsigned char* pos = graph; |
|
||||
const unsigned char* end = graph + length; |
|
||||
const unsigned char* offset = pos; |
|
||||
const char* key_end = key + key_length; |
|
||||
const char* multibyte_start = 0; |
|
||||
|
|
||||
while (GetNextOffset(&pos, end, &offset)) { |
|
||||
/*char <char>+ end_char offsets
|
|
||||
* char <char>+ return value |
|
||||
* char end_char offsets |
|
||||
* char return value |
|
||||
* end_char offsets |
|
||||
* return_value |
|
||||
*/ |
|
||||
int did_consume = 0; |
|
||||
|
|
||||
if (key != key_end && !IsEOL(offset, end)) { |
|
||||
/* Leading <char> is not a match. Don't dive into this child */ |
|
||||
if (!IsMatch(offset, end, key, multibyte_start)) |
|
||||
continue; |
|
||||
did_consume = 1; |
|
||||
NextPos(&offset, &key, &multibyte_start); |
|
||||
/* Possible matches at this point:
|
|
||||
* <char>+ end_char offsets |
|
||||
* <char>+ return value |
|
||||
* end_char offsets |
|
||||
* return value |
|
||||
*/ |
|
||||
|
|
||||
/* Remove all remaining <char> nodes possible */ |
|
||||
while (!IsEOL(offset, end) && key != key_end) { |
|
||||
if (!IsMatch(offset, end, key, multibyte_start)) |
|
||||
return -1; |
|
||||
NextPos(&offset, &key, &multibyte_start); |
|
||||
} |
|
||||
} |
|
||||
/* Possible matches at this point:
|
|
||||
* end_char offsets |
|
||||
* return_value |
|
||||
* If one or more <char> elements were consumed, a failure |
|
||||
* to match is terminal. Otherwise, try the next node. |
|
||||
*/ |
|
||||
if (key == key_end) { |
|
||||
int return_value; |
|
||||
|
|
||||
if (GetReturnValue(offset, end, multibyte_start, &return_value)) |
|
||||
return return_value; |
|
||||
/* The DAFSA guarantees that if the first char is a match, all
|
|
||||
* remaining char elements MUST match if the key is truly present. |
|
||||
*/ |
|
||||
if (did_consume) |
|
||||
return -1; |
|
||||
continue; |
|
||||
} |
|
||||
if (!IsEndCharMatch(offset, end, key, multibyte_start)) { |
|
||||
if (did_consume) |
|
||||
return -1; /* Unexpected */ |
|
||||
continue; |
|
||||
} |
|
||||
NextPos(&offset, &key, &multibyte_start); |
|
||||
pos = offset; /* Dive into child */ |
|
||||
} |
|
||||
|
|
||||
return -1; /* No match */ |
|
||||
} |
|
||||
|
|
||||
/* prototype to skip warning with -Wmissing-prototypes */ |
|
||||
int GetUtfMode(const unsigned char *graph, size_t length); |
|
||||
|
|
||||
int GetUtfMode(const unsigned char *graph, size_t length) |
|
||||
{ |
|
||||
return length > 0 && graph[length - 1] < 0x80; |
|
||||
} |
|
File diff suppressed because it is too large
File diff suppressed because it is too large
Loading…
Reference in new issue