mirror of
https://github.com/Jackett/Jackett.git
synced 2025-09-17 17:34:09 +02:00
Added method in ParseUtil that strips out every invalid XML character from a string.
Added test for ParseUtil.RemoveInvalidXmlChars() using a snippet of rss from XSeeds that was originally causing problems.
This commit is contained in:
@@ -164,6 +164,7 @@
|
|||||||
<Compile Include="TestIIndexerManagerServiceHelper.cs" />
|
<Compile Include="TestIIndexerManagerServiceHelper.cs" />
|
||||||
<Compile Include="TestUtil.cs" />
|
<Compile Include="TestUtil.cs" />
|
||||||
<Compile Include="TestWebClient.cs" />
|
<Compile Include="TestWebClient.cs" />
|
||||||
|
<Compile Include="Util\ParseUtilTests.cs" />
|
||||||
<Compile Include="Util\ServerUtilTests.cs" />
|
<Compile Include="Util\ServerUtilTests.cs" />
|
||||||
<Compile Include="Util\TvCategoryParserTests.cs" />
|
<Compile Include="Util\TvCategoryParserTests.cs" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
@@ -182,6 +183,9 @@
|
|||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Folder Include="Indexers\" />
|
<Folder Include="Indexers\" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<EmbeddedResource Include="Util\Invalid-RSS.xml" />
|
||||||
|
</ItemGroup>
|
||||||
<Choose>
|
<Choose>
|
||||||
<When Condition="'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'">
|
<When Condition="'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'">
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
20
src/Jackett.Test/Util/Invalid-RSS.xml
Normal file
20
src/Jackett.Test/Util/Invalid-RSS.xml
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>RSS Syndicator</title>
|
||||||
|
<link>http://somewebsite.com</link>
|
||||||
|
<description>
|
||||||
|
<br />
|
||||||
|
Enjoy!<br />
|
||||||
|
<br />
|
||||||
|
-<br />
|
||||||
|
<br />
|
||||||
|
group info<br />
|
||||||
|
<br />
|
||||||
|
Know Your Role and Shut Your Mouth!<br />
|
||||||
|
<br />
|
||||||
|
we are now looking for...<br />
|
||||||
|
<br />
|
||||||
|
</description>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
46
src/Jackett.Test/Util/ParseUtilTests.cs
Normal file
46
src/Jackett.Test/Util/ParseUtilTests.cs
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using System.Xml.Linq;
|
||||||
|
using System.Xml.XPath;
|
||||||
|
using FluentAssertions;
|
||||||
|
using Jackett.Utils;
|
||||||
|
using NUnit.Framework;
|
||||||
|
|
||||||
|
namespace JackettTest.Util
|
||||||
|
{
|
||||||
|
[TestFixture]
|
||||||
|
public class ParseUtilTests
|
||||||
|
{
|
||||||
|
private static string InvalidRssXml
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
var type = typeof(ParseUtilTests);
|
||||||
|
using (var resourceStream = type.Assembly.GetManifestResourceStream($"{type.Namespace}.Invalid-RSS.xml"))
|
||||||
|
using (var sr = new StreamReader(resourceStream))
|
||||||
|
{
|
||||||
|
return sr.ReadToEnd();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Test]
|
||||||
|
public void Invalid_RSS_should_parse_after_removing_invalid_chars()
|
||||||
|
{
|
||||||
|
var invalidRss = InvalidRssXml;
|
||||||
|
Action parseAction = () => XDocument.Parse(invalidRss);
|
||||||
|
parseAction.ShouldThrow<Exception>().WithMessage("'\a', hexadecimal value 0x07, is an invalid character. Line 12, position 7.");
|
||||||
|
|
||||||
|
var validRSs = ParseUtil.RemoveInvalidXmlChars(invalidRss);
|
||||||
|
var rssDoc = XDocument.Parse(validRSs);
|
||||||
|
rssDoc.Root.Should().NotBeNull();
|
||||||
|
var description = rssDoc.Root.XPathSelectElement("//description");
|
||||||
|
description.Value.Should().Contain("Know Your Role and Shut Your Mouth!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -13,6 +13,7 @@ using System.Threading.Tasks;
|
|||||||
using Jackett.Models.IndexerConfig;
|
using Jackett.Models.IndexerConfig;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
using System.Xml.Linq;
|
using System.Xml.Linq;
|
||||||
|
using static Jackett.Utils.ParseUtil;
|
||||||
|
|
||||||
namespace Jackett.Indexers
|
namespace Jackett.Indexers
|
||||||
{
|
{
|
||||||
@@ -128,7 +129,7 @@ namespace Jackett.Indexers
|
|||||||
if (rssPage.Content.EndsWith("\0")) {
|
if (rssPage.Content.EndsWith("\0")) {
|
||||||
rssPage.Content = rssPage.Content.Substring(0, rssPage.Content.Length - 1);
|
rssPage.Content = rssPage.Content.Substring(0, rssPage.Content.Length - 1);
|
||||||
}
|
}
|
||||||
rssPage.Content = rssPage.Content.Replace("\0x10", "").Replace("\0x07", "");
|
rssPage.Content = RemoveInvalidXmlChars(rssPage.Content);
|
||||||
var rssDoc = XDocument.Parse(rssPage.Content);
|
var rssDoc = XDocument.Parse(rssPage.Content);
|
||||||
|
|
||||||
foreach (var item in rssDoc.Descendants("item"))
|
foreach (var item in rssDoc.Descendants("item"))
|
||||||
|
@@ -1,9 +1,15 @@
|
|||||||
using System.Globalization;
|
using System.Globalization;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
namespace Jackett.Utils
|
namespace Jackett.Utils
|
||||||
{
|
{
|
||||||
public static class ParseUtil
|
public static class ParseUtil
|
||||||
{
|
{
|
||||||
|
private static readonly Regex InvalidXmlChars =
|
||||||
|
new Regex(
|
||||||
|
@"(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F\uFEFF\uFFFE\uFFFF]",
|
||||||
|
RegexOptions.Compiled);
|
||||||
|
|
||||||
public static string NormalizeSpace(string s)
|
public static string NormalizeSpace(string s)
|
||||||
{
|
{
|
||||||
return s.Trim();
|
return s.Trim();
|
||||||
@@ -17,6 +23,11 @@ namespace Jackett.Utils
|
|||||||
return normalized;
|
return normalized;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static string RemoveInvalidXmlChars(string text)
|
||||||
|
{
|
||||||
|
return string.IsNullOrEmpty(text) ? "" : InvalidXmlChars.Replace(text, "");
|
||||||
|
}
|
||||||
|
|
||||||
public static double CoerceDouble(string str)
|
public static double CoerceDouble(string str)
|
||||||
{
|
{
|
||||||
return double.Parse(NormalizeNumber(str), NumberStyles.Any, CultureInfo.InvariantCulture);
|
return double.Parse(NormalizeNumber(str), NumberStyles.Any, CultureInfo.InvariantCulture);
|
||||||
|
Reference in New Issue
Block a user