Sqlserver
 sql >> база данни >  >> RDS >> Sqlserver

T-SQL алгоритъм за кодиране на опасни HTML символи като препратки към HTML символни обекти

Кодиране на пет специални знака с рекурсивен CTE:

DECLARE 
  @unsafe NVARCHAR(MAX),
  @safe   NVARCHAR(MAX) 

--
-- Create the unsafe html string
-- 
SET @unsafe = N'html''s encoding "method" is <= or >= & 1234 ' + NCHAR(129) 
--
-- Use a recursive CTE to iterate through each character in the string
-- 
;WITH cte AS 
(
  -- 
  -- The first row will contain the original
  -- string, an empty string to be used to 
  -- build the "safe" string, and a position
  -- column to mark the character position
  -- of the loop
  -- 
  SELECT 
    @unsafe AS unsafe_html,
    CONVERT(NVARCHAR(MAX), '') AS safe_html,
    1 AS pos
  WHERE @unsafe IS NOT NULL AND LEN(@unsafe) > 0 
  UNION ALL
  -- 
  -- Create a loop: 
  -- The anchor row starts at position one.
  -- Increment the position by one for each pass.
  -- Stop when the position value is equal to the string lenth.
  -- Evaluate the character in each string
  -- If the ASCII value > 128, use the &# format.
  -- Otherwise, check for 5 special characters: " & ' < >
  -- Use the encoding reference or just the original character
  --
  SELECT 
    @unsafe AS unsafe_html,
    CONVERT(NVARCHAR(MAX), safe_html + 
    CASE WHEN UNICODE(SUBSTRING(unsafe_html, pos, 1)) > 128 
         THEN '&#' + CONVERT(NVARCHAR(10), UNICODE(SUBSTRING(unsafe_html, pos, 1)))  
         ELSE CASE SUBSTRING(unsafe_html, pos, 1)
              WHEN '"'  THEN '&quot' 
              WHEN '&'  THEN '&amp'
              WHEN '''' THEN '&apos'
              WHEN '<'  THEN '&lt'
              WHEN '>'  THEN '&gt'
              ELSE SUBSTRING(unsafe_html, pos, 1)
              END 
         END ) AS safe_html,
    pos + 1 AS pos
  FROM cte
  WHERE pos <= LEN(@unsafe)
) 
--
-- Each pass through the string creates a row in the CTE
-- The last row will have the position value of the string length + 1
-- Use that row as the safe html string
-- SQL Server allows a max recursion of 32767
-- 
SELECT @safe = (
  SELECT safe_html 
  FROM cte
  WHERE pos = LEN(@unsafe) + 1
) 
OPTION (MAXRECURSION 32767) 

SELECT @safe

-- html&aposs encoding &quotmethod&quot is &lt= or &gt= &amp 1234 &#129

Първоначална версия:

DECLARE @s NVARCHAR(100)

SET @s = '<html>unsafe & safe<html>'
SELECT @s 
SELECT (SELECT @s FOR XML PATH(''))

---------------------------------------
<html>unsafe & safe<html>

-----------------------------------------
&lt;html&gt;unsafe &amp; safe&lt;html&gt;

Пълно кодиране с всички официални препратки:

DECLARE 
    @unsafe  NVARCHAR(MAX),
    @safe NVARCHAR(MAX) 

-- Build string with first 10,000 unicode chars
SELECT @unsafe = COALESCE(@unsafe, '') + NCHAR(number) + ' ' 
FROM (
    SELECT TOP 10000 ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS number
    FROM sys.all_objects s1 CROSS JOIN sys.all_objects s2 
) t

-- Build table variable with character entity references defined in HTML 4.0
-- Reference: http://www.htmlcodetutorial.com/characterentities_famsupp_69.html
DECLARE @t TABLE (
    name NVARCHAR(25) NOT NULL, 
    unicode_val INT NOT NULL PRIMARY KEY 
) 

INSERT @t 
VALUES
('&quot', 34),
('&amp', 38),
('&apos', 39),
('&lt', 60),
('&gt', 62),
('&nbsp', 160),
('&iexcl', 161),
('&cent', 162),
('&pound', 163),
('&curren', 164),
('&yen', 165),
('&brvbar', 166),
('&sect', 167),
('&uml', 168),
('&copy', 169),
('&ordf', 170),
('&laquo', 171),
('&not', 172),
('&shy', 173),
('&reg', 174),
('&macr', 175),
('&deg', 176),
('&plusmn', 177),
('&sup2', 178),
('&sup3', 179),
('&acute', 180),
('&micro', 181),
('&para', 182),
('&middot', 183),
('&cedil', 184),
('&sup1', 185),
('&ordm', 186),
('&raquo', 187),
('&frac14', 188),
('&frac12', 189),
('&frac34', 190),
('&iquest', 191),
('&Agrave', 192),
('&Aacute', 193),
('&Acirc', 194),
('&Atilde', 195),
('&Auml', 196),
('&Aring', 197),
('&AElig', 198),
('&Ccedil', 199),
('&Egrave', 200),
('&Eacute', 201),
('&Ecirc', 202),
('&Euml', 203),
('&Igrave', 204),
('&Iacute', 205),
('&Icirc', 206),
('&Iuml', 207),
('&ETH', 208),
('&Ntilde', 209),
('&Ograve', 210),
('&Oacute', 211),
('&Ocirc', 212),
('&Otilde', 213),
('&Ouml', 214),
('&times', 215),
('&Oslash', 216),
('&Ugrave', 217),
('&Uacute', 218),
('&Ucirc', 219),
('&Uuml', 220),
('&Yacute', 221),
('&THORN', 222),
('&szlig', 223),
('&agrave', 224),
('&aacute', 225),
('&acirc', 226),
('&atilde', 227),
('&auml', 228),
('&aring', 229),
('&aelig', 230),
('&ccedil', 231),
('&egrave', 232),
('&eacute', 233),
('&ecirc', 234),
('&euml', 235),
('&igrave', 236),
('&iacute', 237),
('&icirc', 238),
('&iuml', 239),
('&eth', 240),
('&ntilde', 241),
('&ograve', 242),
('&oacute', 243),
('&ocirc', 244),
('&otilde', 245),
('&ouml', 246),
('&divide', 247),
('&oslash', 248),
('&ugrave', 249),
('&uacute', 250),
('&ucirc', 251),
('&uuml', 252),
('&yacute', 253),
('&thorn', 254),
('&yuml', 255),
('&OElig', 338),
('&oelig', 339),
('&Scaron', 352),
('&scaron', 353),
('&Yuml', 376),
('&fnof', 402),
('&circ', 710),
('&tilde', 732),
('&Alpha', 913),
('&Beta', 914),
('&Gamma', 915),
('&Delta', 916),
('&Epsilon', 917),
('&Zeta', 918),
('&Eta', 919),
('&Theta', 920),
('&Iota', 921),
('&Kappa', 922),
('&Lambda', 923),
('&Mu', 924),
('&Nu', 925),
('&Xi', 926),
('&Omicron', 927),
('&Pi', 928),
('&Rho', 929),
('&Sigma', 931),
('&Tau', 932),
('&Upsilon', 933),
('&Phi', 934),
('&Chi', 935),
('&Psi', 936),
('&Omega', 937),
('&alpha', 945),
('&beta', 946),
('&gamma', 947),
('&delta', 948),
('&epsilon', 949),
('&zeta', 950),
('&eta', 951),
('&theta', 952),
('&iota', 953),
('&kappa', 954),
('&lambda', 955),
('&mu', 956),
('&nu', 957),
('&xi', 958),
('&omicron', 959),
('&pi', 960),
('&rho', 961),
('&sigmaf', 962),
('&sigma', 963),
('&tau', 964),
('&upsilon', 965),
('&phi', 966),
('&chi', 967),
('&psi', 968),
('&omega', 969),
('&thetasym', 977),
('&upsih', 978),
('&piv', 982),
('&ensp', 8194),
('&emsp', 8195),
('&thinsp', 8201),
('&zwnj', 8204),
('&zwj', 8205),
('&lrm', 8206),
('&rlm', 8207),
('&ndash', 8211),
('&mdash', 8212),
('&lsquo', 8216),
('&rsquo', 8217),
('&sbquo', 8218),
('&ldquo', 8220),
('&rdquo', 8221),
('&bdquo', 8222),
('&dagger', 8224),
('&Dagger', 8225),
('&bull', 8226),
('&hellip', 8230),
('&permil', 8240),
('&prime', 8242),
('&Prime', 8243),
('&lsaquo', 8249),
('&rsaquo', 8250),
('&oline', 8254),
('&frasl', 8260),
('&euro', 8364),
('&image', 8465),
('&weierp', 8472),
('&real', 8476),
('&trade', 8482),
('&alefsym', 8501),
('&larr', 8592),
('&uarr', 8593),
('&rarr', 8594),
('&darr', 8595),
('&harr', 8596),
('&crarr', 8629),
('&lArr', 8656),
('&uArr', 8657),
('&rArr', 8658),
('&dArr', 8659),
('&hArr', 8660),
('&forall', 8704),
('&part', 8706),
('&exist', 8707),
('&empty', 8709),
('&nabla', 8711),
('&isin', 8712),
('&notin', 8713),
('&ni', 8715),
('&prod', 8719),
('&sum', 8721),
('&minus', 8722),
('&lowast', 8727),
('&radic', 8730),
('&prop', 8733),
('&infin', 8734),
('&ang', 8736),
('&and', 8743),
('&or', 8744),
('&cap', 8745),
('&cup', 8746),
('&int', 8747),
('&there4', 8756),
('&sim', 8764),
('&cong', 8773),
('&asymp', 8776),
('&ne', 8800),
('&equiv', 8801),
('&le', 8804),
('&ge', 8805),
('&sub', 8834),
('&sup', 8835),
('&nsub', 8836),
('&sube', 8838),
('&supe', 8839),
('&oplus', 8853),
('&otimes', 8855),
('&perp', 8869),
('&sdot', 8901),
('&lceil', 8968),
('&rceil', 8969),
('&lfloor', 8970),
('&rfloor', 8971),
('&lang', 9001),
('&rang', 9002),
('&loz', 9674),
('&spades', 9824),
('&clubs', 9827),
('&hearts', 9829),
('&diams', 9830)

-- Build numbers table to parse the string
DECLARE @numbers TABLE (number INT NOT NULL PRIMARY KEY) 
INSERT @numbers
SELECT TOP (LEN(@unsafe)) ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS number
FROM sys.all_objects s1 CROSS JOIN sys.all_objects s2

-- Use numbers table to parse each character.
-- If a match is found in character entity reference table,
-- then use the safe substitute. Otherwise, if the unicode
-- value is greater than 128, use &#<unicode char value>.
-- Finally, use the original character if nothing else
-- is a match
SELECT @safe = COALESCE(@safe,'') 
       + COALESCE(name, 
         CASE WHEN UNICODE(SUBSTRING(@unsafe, number, 1)) > 128 THEN '&#' 
          + CONVERT(NVARCHAR(10), UNICODE(SUBSTRING(@unsafe, number, 1))) 
          ELSE SUBSTRING(@unsafe, number, 1) END)
FROM @numbers 
LEFT OUTER JOIN @t  
  ON UNICODE(SUBSTRING(@unsafe, number, 1)) = unicode_val

SELECT @safe AS [safe]

Results:  
! &quot # $ % &amp &apos ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; 
&lt = &gt ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 
[ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { 
| } ~   &#129 &#130 &#131 &#132 &#133 &#134 &#135 &#136 &#137 &#138 
&#139 &#140 &#141 &#142 &#143 &#144 &#145 &#146 &#147 &#148 &#149 
&#150 &#151 &#152 &#153 &#154 &#155 &#156 &#157 &#158 &#159 &nbsp 
&iexcl &cent &pound &curren &yen &brvbar &sect &uml &copy &ordf 
&laquo &not &shy &reg &macr &deg &plusmn &sup2 &sup3 &acute &micro 
&para &middot &cedil &sup1 &ordm &raquo &frac14 &frac12 &frac34 
&iquest &Agrave &Aacute &Acirc &Atilde &Auml &Aring &AElig &Ccedil 
&Egrave &Eacute &Ecirc &Euml &Igrave &Iacute &Icirc &Iuml &ETH &Ntilde 
&Ograve &Oacute &Ocirc &Otilde &Ouml &times &Oslash &Ugrave &Uacute 
&Ucirc &Uuml &Yacute &THORN &szlig &agrave &aacute &acirc &atilde &auml 
&aring &aelig &ccedil &egrave &eacute &ecirc &euml &igrave &iacute &icirc 
&iuml &eth &ntilde &ograve &oacute &ocirc &otilde &ouml &divide &oslash 
&ugrave &uacute &ucirc &uuml &yacute &thorn &yuml &#256 &#257 &#258 &#259 
&#260 &#261 &#262 &#263 &#264 &#265 &#266...


  1. Database
  2.   
  3. Mysql
  4.   
  5. Oracle
  6.   
  7. Sqlserver
  8.   
  9. PostgreSQL
  10.   
  11. Access
  12.   
  13. SQLite
  14.   
  15. MariaDB
  1. sql проблем със заявка при докладване

  2. Най-бързият начин за премахване на нечислови знаци от VARCHAR в SQL Server

  3. Как да възстановя изгледа в SQL Server 2008

  4. ModuleNotFoundError:Няма модул с име „pyodbc“ при импортиране на pyodbc в py скрипт

  5. Разбиране на размера на съхранение „datetime2“ в SQL Server