diff --git a/src/Jackett.Test/Jackett.Test.csproj b/src/Jackett.Test/Jackett.Test.csproj index ec1fff28d..2fd2fa0a2 100644 --- a/src/Jackett.Test/Jackett.Test.csproj +++ b/src/Jackett.Test/Jackett.Test.csproj @@ -164,6 +164,7 @@ + @@ -182,6 +183,9 @@ + + + diff --git a/src/Jackett.Test/Util/Invalid-RSS.xml b/src/Jackett.Test/Util/Invalid-RSS.xml new file mode 100644 index 000000000..5ae311fa4 --- /dev/null +++ b/src/Jackett.Test/Util/Invalid-RSS.xml @@ -0,0 +1,20 @@ + + + + RSS Syndicator + http://somewebsite.com + + <br /> + Enjoy!<br /> + <br /> + -<br /> + <br /> +  group info<br /> + <br /> + Know Your Role and Shut Your Mouth!<br /> + <br /> +  we are now looking for...<br /> + <br /> + + + \ No newline at end of file diff --git a/src/Jackett.Test/Util/ParseUtilTests.cs b/src/Jackett.Test/Util/ParseUtilTests.cs new file mode 100644 index 000000000..a734e6e88 --- /dev/null +++ b/src/Jackett.Test/Util/ParseUtilTests.cs @@ -0,0 +1,46 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.Xml.Linq; +using System.Xml.XPath; +using FluentAssertions; +using Jackett.Utils; +using NUnit.Framework; + +namespace JackettTest.Util +{ + [TestFixture] + public class ParseUtilTests + { + private static string InvalidRssXml + { + get + { + var type = typeof(ParseUtilTests); + using (var resourceStream = type.Assembly.GetManifestResourceStream($"{type.Namespace}.Invalid-RSS.xml")) + using (var sr = new StreamReader(resourceStream)) + { + return sr.ReadToEnd(); + } + + } + } + + [Test] + public void Invalid_RSS_should_parse_after_removing_invalid_chars() + { + var invalidRss = InvalidRssXml; + Action parseAction = () => XDocument.Parse(invalidRss); + parseAction.ShouldThrow().WithMessage("'\a', hexadecimal value 0x07, is an invalid character. Line 12, position 7."); + + var validRSs = ParseUtil.RemoveInvalidXmlChars(invalidRss); + var rssDoc = XDocument.Parse(validRSs); + rssDoc.Root.Should().NotBeNull(); + var description = rssDoc.Root.XPathSelectElement("//description"); + description.Value.Should().Contain("Know Your Role and Shut Your Mouth!"); + } + } +} diff --git a/src/Jackett/Indexers/XSpeeds.cs b/src/Jackett/Indexers/XSpeeds.cs index d0bfccba6..e8f9515f0 100644 --- a/src/Jackett/Indexers/XSpeeds.cs +++ b/src/Jackett/Indexers/XSpeeds.cs @@ -13,6 +13,7 @@ using System.Threading.Tasks; using Jackett.Models.IndexerConfig; using System.Text.RegularExpressions; using System.Xml.Linq; +using static Jackett.Utils.ParseUtil; namespace Jackett.Indexers { @@ -128,7 +129,7 @@ namespace Jackett.Indexers if (rssPage.Content.EndsWith("\0")) { rssPage.Content = rssPage.Content.Substring(0, rssPage.Content.Length - 1); } - rssPage.Content = rssPage.Content.Replace("\0x10", "").Replace("\0x07", ""); + rssPage.Content = RemoveInvalidXmlChars(rssPage.Content); var rssDoc = XDocument.Parse(rssPage.Content); foreach (var item in rssDoc.Descendants("item")) diff --git a/src/Jackett/Utils/ParseUtil.cs b/src/Jackett/Utils/ParseUtil.cs index 89bb8693d..2bdbed60a 100644 --- a/src/Jackett/Utils/ParseUtil.cs +++ b/src/Jackett/Utils/ParseUtil.cs @@ -1,9 +1,15 @@ using System.Globalization; +using System.Text.RegularExpressions; namespace Jackett.Utils { public static class ParseUtil { + private static readonly Regex InvalidXmlChars = + new Regex( + @"(?