'use strict'; // Use direct extract instead of `regenerate` to reduse browserified size var src_Any = exports.src_Any = require('uc.micro/properties/Any/regex').source; var src_Cc = exports.src_Cc = require('uc.micro/categories/Cc/regex').source; var src_Z = exports.src_Z = require('uc.micro/categories/Z/regex').source; var src_P = exports.src_P = require('uc.micro/categories/P/regex').source; // \p{\Z\P\Cc\CF} (white spaces + control + format + punctuation) var src_ZPCc = exports.src_ZPCc = [ src_Z, src_P, src_Cc ].join('|'); // \p{\Z\Cc} (white spaces + control) var src_ZCc = exports.src_ZCc = [ src_Z, src_Cc ].join('|'); // All possible word characters (everything without punctuation, spaces & controls) // Defined via punctuation & spaces to save space // Should be something like \p{\L\N\S\M} (\w but without `_`) var src_pseudo_letter = '(?:(?!>|<|' + src_ZPCc + ')' + src_Any + ')'; // The same as abothe but without [0-9] // var src_pseudo_letter_non_d = '(?:(?![0-9]|' + src_ZPCc + ')' + src_Any + ')'; //////////////////////////////////////////////////////////////////////////////// var src_ip4 = exports.src_ip4 = '(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'; // Prohibit [@/] in user/pass to avoid wrong domain fetch. exports.src_auth = '(?:(?:(?!' + src_ZCc + '|[@/]).)+@)?'; var src_port = exports.src_port = '(?::(?:6(?:[0-4]\\d{3}|5(?:[0-4]\\d{2}|5(?:[0-2]\\d|3[0-5])))|[1-5]?\\d{1,4}))?'; var src_host_terminator = exports.src_host_terminator = '(?=$|>|<|' + src_ZPCc + ')(?!-|_|:\\d|\\.-|\\.(?!$|' + src_ZPCc + '))'; var src_path = exports.src_path = '(?:' + '[/?#]' + '(?:' + '(?!' + src_ZCc + '|[()[\\]{}.,"\'?!\\-<>]).|' + '\\[(?:(?!' + src_ZCc + '|\\]).)*\\]|' + '\\((?:(?!' + src_ZCc + '|[)]).)*\\)|' + '\\{(?:(?!' + src_ZCc + '|[}]).)*\\}|' + '\\"(?:(?!' + src_ZCc + '|["]).)+\\"|' + "\\'(?:(?!" + src_ZCc + "|[']).)+\\'|" + "\\'(?=" + src_pseudo_letter + ').|' + // allow `I'm_king` if no pair found '\\.{2,3}[a-zA-Z0-9%/]|' + // github has ... in commit range links. Restrict to // - english // - percent-encoded // - parts of file path // until more examples found. '\\.(?!' + src_ZCc + '|[.]).|' + '\\-(?!--(?:[^-]|$))(?:-*)|' + // `---` => long dash, terminate '\\,(?!' + src_ZCc + ').|' + // allow `,,,` in paths '\\!(?!' + src_ZCc + '|[!]).|' + '\\?(?!' + src_ZCc + '|[?]).' + ')+' + '|\\/' + ')?'; var src_email_name = exports.src_email_name = '[\\-;:&=\\+\\$,\\"\\.a-zA-Z0-9_]+'; var src_xn = exports.src_xn = 'xn--[a-z0-9\\-]{1,59}'; // More to read about domain names // http://serverfault.com/questions/638260/ var src_domain_root = exports.src_domain_root = // Allow letters & digits (http://test1) '(?:' + src_xn + '|' + src_pseudo_letter + '{1,63}' + ')'; var src_domain = exports.src_domain = '(?:' + src_xn + '|' + '(?:' + src_pseudo_letter + ')' + '|' + // don't allow `--` in domain names, because: // - that can conflict with markdown — / – // - nobody use those anyway '(?:' + src_pseudo_letter + '(?:-(?!-)|' + src_pseudo_letter + '){0,61}' + src_pseudo_letter + ')' + ')'; var src_host = exports.src_host = '(?:' + // Don't need IP check, because digits are already allowed in normal domain names // src_ip4 + // '|' + '(?:(?:(?:' + src_domain + ')\\.)*' + src_domain_root + ')' + ')'; var tpl_host_fuzzy = exports.tpl_host_fuzzy = '(?:' + src_ip4 + '|' + '(?:(?:(?:' + src_domain + ')\\.)+(?:%TLDS%))' + ')'; var tpl_host_no_ip_fuzzy = exports.tpl_host_no_ip_fuzzy = '(?:(?:(?:' + src_domain + ')\\.)+(?:%TLDS%))'; exports.src_host_strict = src_host + src_host_terminator; var tpl_host_fuzzy_strict = exports.tpl_host_fuzzy_strict = tpl_host_fuzzy + src_host_terminator; exports.src_host_port_strict = src_host + src_port + src_host_terminator; var tpl_host_port_fuzzy_strict = exports.tpl_host_port_fuzzy_strict = tpl_host_fuzzy + src_port + src_host_terminator; var tpl_host_port_no_ip_fuzzy_strict = exports.tpl_host_port_no_ip_fuzzy_strict = tpl_host_no_ip_fuzzy + src_port + src_host_terminator; //////////////////////////////////////////////////////////////////////////////// // Main rules // Rude test fuzzy links by host, for quick deny exports.tpl_host_fuzzy_test = 'localhost|www\\.|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:' + src_ZPCc + '|>|$))'; exports.tpl_email_fuzzy = '(^|<|>|\\(|' + src_ZCc + ')(' + src_email_name + '@' + tpl_host_fuzzy_strict + ')'; exports.tpl_link_fuzzy = // Fuzzy link can't be prepended with .:/\- and non punctuation. // but can start with > (markdown blockquote) '(^|(?![.:/\\-_@])(?:[$+<=>^`|]|' + src_ZPCc + '))' + '((?![$+<=>^`|])' + tpl_host_port_fuzzy_strict + src_path + ')'; exports.tpl_link_no_ip_fuzzy = // Fuzzy link can't be prepended with .:/\- and non punctuation. // but can start with > (markdown blockquote) '(^|(?![.:/\\-_@])(?:[$+<=>^`|]|' + src_ZPCc + '))' + '((?![$+<=>^`|])' + tpl_host_port_no_ip_fuzzy_strict + src_path + ')';