Added method in ParseUtil that strips out every invalid XML character from a string.

Added test for ParseUtil.RemoveInvalidXmlChars() using a snippet of rss from XSeeds that was originally causing problems.
This commit is contained in:
Jay Otterbein
2016-11-03 20:23:12 -05:00
committed by kaso17
parent 72d3f2ea49
commit d1e767bd41
5 changed files with 83 additions and 1 deletions

View File

@@ -1,9 +1,15 @@
using System.Globalization;
using System.Text.RegularExpressions;
namespace Jackett.Utils
{
public static class ParseUtil
{
private static readonly Regex InvalidXmlChars =
new Regex(
@"(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F\uFEFF\uFFFE\uFFFF]",
RegexOptions.Compiled);
public static string NormalizeSpace(string s)
{
return s.Trim();
@@ -17,6 +23,11 @@ namespace Jackett.Utils
return normalized;
}
public static string RemoveInvalidXmlChars(string text)
{
return string.IsNullOrEmpty(text) ? "" : InvalidXmlChars.Replace(text, "");
}
public static double CoerceDouble(string str)
{
return double.Parse(NormalizeNumber(str), NumberStyles.Any, CultureInfo.InvariantCulture);