You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
163 lines
5.3 KiB
163 lines
5.3 KiB
4 weeks ago
|
'use strict';
|
||
|
|
||
|
// Use direct extract instead of `regenerate` to reduse browserified size
|
||
|
var src_Any = exports.src_Any = require('uc.micro/properties/Any/regex').source;
|
||
|
var src_Cc = exports.src_Cc = require('uc.micro/categories/Cc/regex').source;
|
||
|
var src_Z = exports.src_Z = require('uc.micro/categories/Z/regex').source;
|
||
|
var src_P = exports.src_P = require('uc.micro/categories/P/regex').source;
|
||
|
|
||
|
// \p{\Z\P\Cc\CF} (white spaces + control + format + punctuation)
|
||
|
var src_ZPCc = exports.src_ZPCc = [ src_Z, src_P, src_Cc ].join('|');
|
||
|
|
||
|
// \p{\Z\Cc} (white spaces + control)
|
||
|
var src_ZCc = exports.src_ZCc = [ src_Z, src_Cc ].join('|');
|
||
|
|
||
|
// All possible word characters (everything without punctuation, spaces & controls)
|
||
|
// Defined via punctuation & spaces to save space
|
||
|
// Should be something like \p{\L\N\S\M} (\w but without `_`)
|
||
|
var src_pseudo_letter = '(?:(?!>|<|' + src_ZPCc + ')' + src_Any + ')';
|
||
|
// The same as abothe but without [0-9]
|
||
|
// var src_pseudo_letter_non_d = '(?:(?![0-9]|' + src_ZPCc + ')' + src_Any + ')';
|
||
|
|
||
|
////////////////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
var src_ip4 = exports.src_ip4 =
|
||
|
|
||
|
'(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)';
|
||
|
|
||
|
// Prohibit [@/] in user/pass to avoid wrong domain fetch.
|
||
|
exports.src_auth = '(?:(?:(?!' + src_ZCc + '|[@/]).)+@)?';
|
||
|
|
||
|
var src_port = exports.src_port =
|
||
|
|
||
|
'(?::(?:6(?:[0-4]\\d{3}|5(?:[0-4]\\d{2}|5(?:[0-2]\\d|3[0-5])))|[1-5]?\\d{1,4}))?';
|
||
|
|
||
|
var src_host_terminator = exports.src_host_terminator =
|
||
|
|
||
|
'(?=$|>|<|' + src_ZPCc + ')(?!-|_|:\\d|\\.-|\\.(?!$|' + src_ZPCc + '))';
|
||
|
|
||
|
var src_path = exports.src_path =
|
||
|
|
||
|
'(?:' +
|
||
|
'[/?#]' +
|
||
|
'(?:' +
|
||
|
'(?!' + src_ZCc + '|[()[\\]{}.,"\'?!\\-<>]).|' +
|
||
|
'\\[(?:(?!' + src_ZCc + '|\\]).)*\\]|' +
|
||
|
'\\((?:(?!' + src_ZCc + '|[)]).)*\\)|' +
|
||
|
'\\{(?:(?!' + src_ZCc + '|[}]).)*\\}|' +
|
||
|
'\\"(?:(?!' + src_ZCc + '|["]).)+\\"|' +
|
||
|
"\\'(?:(?!" + src_ZCc + "|[']).)+\\'|" +
|
||
|
"\\'(?=" + src_pseudo_letter + ').|' + // allow `I'm_king` if no pair found
|
||
|
'\\.{2,3}[a-zA-Z0-9%/]|' + // github has ... in commit range links. Restrict to
|
||
|
// - english
|
||
|
// - percent-encoded
|
||
|
// - parts of file path
|
||
|
// until more examples found.
|
||
|
'\\.(?!' + src_ZCc + '|[.]).|' +
|
||
|
'\\-(?!--(?:[^-]|$))(?:-*)|' + // `---` => long dash, terminate
|
||
|
'\\,(?!' + src_ZCc + ').|' + // allow `,,,` in paths
|
||
|
'\\!(?!' + src_ZCc + '|[!]).|' +
|
||
|
'\\?(?!' + src_ZCc + '|[?]).' +
|
||
|
')+' +
|
||
|
'|\\/' +
|
||
|
')?';
|
||
|
|
||
|
var src_email_name = exports.src_email_name =
|
||
|
|
||
|
'[\\-;:&=\\+\\$,\\"\\.a-zA-Z0-9_]+';
|
||
|
|
||
|
var src_xn = exports.src_xn =
|
||
|
|
||
|
'xn--[a-z0-9\\-]{1,59}';
|
||
|
|
||
|
// More to read about domain names
|
||
|
// http://serverfault.com/questions/638260/
|
||
|
|
||
|
var src_domain_root = exports.src_domain_root =
|
||
|
|
||
|
// Allow letters & digits (http://test1)
|
||
|
'(?:' +
|
||
|
src_xn +
|
||
|
'|' +
|
||
|
src_pseudo_letter + '{1,63}' +
|
||
|
')';
|
||
|
|
||
|
var src_domain = exports.src_domain =
|
||
|
|
||
|
'(?:' +
|
||
|
src_xn +
|
||
|
'|' +
|
||
|
'(?:' + src_pseudo_letter + ')' +
|
||
|
'|' +
|
||
|
// don't allow `--` in domain names, because:
|
||
|
// - that can conflict with markdown — / –
|
||
|
// - nobody use those anyway
|
||
|
'(?:' + src_pseudo_letter + '(?:-(?!-)|' + src_pseudo_letter + '){0,61}' + src_pseudo_letter + ')' +
|
||
|
')';
|
||
|
|
||
|
var src_host = exports.src_host =
|
||
|
|
||
|
'(?:' +
|
||
|
// Don't need IP check, because digits are already allowed in normal domain names
|
||
|
// src_ip4 +
|
||
|
// '|' +
|
||
|
'(?:(?:(?:' + src_domain + ')\\.)*' + src_domain_root + ')' +
|
||
|
')';
|
||
|
|
||
|
var tpl_host_fuzzy = exports.tpl_host_fuzzy =
|
||
|
|
||
|
'(?:' +
|
||
|
src_ip4 +
|
||
|
'|' +
|
||
|
'(?:(?:(?:' + src_domain + ')\\.)+(?:%TLDS%))' +
|
||
|
')';
|
||
|
|
||
|
var tpl_host_no_ip_fuzzy = exports.tpl_host_no_ip_fuzzy =
|
||
|
|
||
|
'(?:(?:(?:' + src_domain + ')\\.)+(?:%TLDS%))';
|
||
|
|
||
|
exports.src_host_strict =
|
||
|
|
||
|
src_host + src_host_terminator;
|
||
|
|
||
|
var tpl_host_fuzzy_strict = exports.tpl_host_fuzzy_strict =
|
||
|
|
||
|
tpl_host_fuzzy + src_host_terminator;
|
||
|
|
||
|
exports.src_host_port_strict =
|
||
|
|
||
|
src_host + src_port + src_host_terminator;
|
||
|
|
||
|
var tpl_host_port_fuzzy_strict = exports.tpl_host_port_fuzzy_strict =
|
||
|
|
||
|
tpl_host_fuzzy + src_port + src_host_terminator;
|
||
|
|
||
|
var tpl_host_port_no_ip_fuzzy_strict = exports.tpl_host_port_no_ip_fuzzy_strict =
|
||
|
|
||
|
tpl_host_no_ip_fuzzy + src_port + src_host_terminator;
|
||
|
|
||
|
|
||
|
////////////////////////////////////////////////////////////////////////////////
|
||
|
// Main rules
|
||
|
|
||
|
// Rude test fuzzy links by host, for quick deny
|
||
|
exports.tpl_host_fuzzy_test =
|
||
|
|
||
|
'localhost|www\\.|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:' + src_ZPCc + '|>|$))';
|
||
|
|
||
|
exports.tpl_email_fuzzy =
|
||
|
|
||
|
'(^|<|>|\\(|' + src_ZCc + ')(' + src_email_name + '@' + tpl_host_fuzzy_strict + ')';
|
||
|
|
||
|
exports.tpl_link_fuzzy =
|
||
|
// Fuzzy link can't be prepended with .:/\- and non punctuation.
|
||
|
// but can start with > (markdown blockquote)
|
||
|
'(^|(?![.:/\\-_@])(?:[$+<=>^`|]|' + src_ZPCc + '))' +
|
||
|
'((?![$+<=>^`|])' + tpl_host_port_fuzzy_strict + src_path + ')';
|
||
|
|
||
|
exports.tpl_link_no_ip_fuzzy =
|
||
|
// Fuzzy link can't be prepended with .:/\- and non punctuation.
|
||
|
// but can start with > (markdown blockquote)
|
||
|
'(^|(?![.:/\\-_@])(?:[$+<=>^`|]|' + src_ZPCc + '))' +
|
||
|
'((?![$+<=>^`|])' + tpl_host_port_no_ip_fuzzy_strict + src_path + ')';
|