Added method in ParseUtil that strips out every invalid XML character from a string.

Added test for ParseUtil.RemoveInvalidXmlChars() using a snippet of rss from XSeeds that was originally causing problems.
This commit is contained in:
Jay Otterbein
2016-11-03 20:23:12 -05:00
committed by kaso17
parent 72d3f2ea49
commit d1e767bd41
5 changed files with 83 additions and 1 deletions

View File

@@ -164,6 +164,7 @@
<Compile Include="TestIIndexerManagerServiceHelper.cs" />
<Compile Include="TestUtil.cs" />
<Compile Include="TestWebClient.cs" />
<Compile Include="Util\ParseUtilTests.cs" />
<Compile Include="Util\ServerUtilTests.cs" />
<Compile Include="Util\TvCategoryParserTests.cs" />
</ItemGroup>
@@ -182,6 +183,9 @@
<ItemGroup>
<Folder Include="Indexers\" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="Util\Invalid-RSS.xml" />
</ItemGroup>
<Choose>
<When Condition="'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'">
<ItemGroup>

View File

@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>RSS Syndicator</title>
<link>http://somewebsite.com</link>
<description>
&lt;br /&gt;
Enjoy!&lt;br /&gt;
&lt;br /&gt;
-&lt;br /&gt;
&lt;br /&gt;
 group info&lt;br /&gt;
&lt;br /&gt;
Know Your Role and Shut Your Mouth!&lt;br /&gt;
&lt;br /&gt;
 we are now looking for...&lt;br /&gt;
&lt;br /&gt;
</description>
</channel>
</rss>

View File

@@ -0,0 +1,46 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Xml.Linq;
using System.Xml.XPath;
using FluentAssertions;
using Jackett.Utils;
using NUnit.Framework;
namespace JackettTest.Util
{
[TestFixture]
public class ParseUtilTests
{
private static string InvalidRssXml
{
get
{
var type = typeof(ParseUtilTests);
using (var resourceStream = type.Assembly.GetManifestResourceStream($"{type.Namespace}.Invalid-RSS.xml"))
using (var sr = new StreamReader(resourceStream))
{
return sr.ReadToEnd();
}
}
}
[Test]
public void Invalid_RSS_should_parse_after_removing_invalid_chars()
{
var invalidRss = InvalidRssXml;
Action parseAction = () => XDocument.Parse(invalidRss);
parseAction.ShouldThrow<Exception>().WithMessage("'\a', hexadecimal value 0x07, is an invalid character. Line 12, position 7.");
var validRSs = ParseUtil.RemoveInvalidXmlChars(invalidRss);
var rssDoc = XDocument.Parse(validRSs);
rssDoc.Root.Should().NotBeNull();
var description = rssDoc.Root.XPathSelectElement("//description");
description.Value.Should().Contain("Know Your Role and Shut Your Mouth!");
}
}
}