From 9cbc78b1c213a51358e4f57ae21148b9b717bae5 Mon Sep 17 00:00:00 2001 From: Diego Heras Date: Mon, 13 Apr 2020 06:22:50 +0200 Subject: [PATCH] core: fix cookie parsing (part 2) (#8150) * core: fix cookie parsing (part 2) After fixing cookie storage in #8133 I noticed that I still have a lot of '.json.bak' files in the Jackett configuration folder. After deleting them they were created again in each request. The cause was we were parsing bad the cookies with '=' character in the value. Most Cloudflare cookies include if so we were sending bad cookies and solving the callenge in each request. This PR should increase performance in several ways: we are not solving the challenge again (it takes time), we are not making extra requests and we are not updating the Jackett configuration in each request (both files '.json' and '.json.bak'). Tested with the client HttpWebClient2NetCore only. Please do some tests with the site 1337x. --- src/Jackett.Common/Indexers/BaseIndexer.cs | 22 ++-- .../Utils/Clients/HttpWebClient.cs | 23 ++-- .../Utils/Clients/HttpWebClient2.cs | 26 ++--- .../Utils/Clients/HttpWebClient2NetCore.cs | 24 ++-- .../Utils/Clients/HttpWebClientNetCore.cs | 23 ++-- src/Jackett.Common/Utils/CookieUtil.cs | 41 +++++++ src/Jackett.Test/Utils/CookieUtilTests.cs | 104 ++++++++++++++++++ 7 files changed, 183 insertions(+), 80 deletions(-) create mode 100644 src/Jackett.Common/Utils/CookieUtil.cs create mode 100644 src/Jackett.Test/Utils/CookieUtilTests.cs diff --git a/src/Jackett.Common/Indexers/BaseIndexer.cs b/src/Jackett.Common/Indexers/BaseIndexer.cs index c587bcfc6..ca3857e4a 100644 --- a/src/Jackett.Common/Indexers/BaseIndexer.cs +++ b/src/Jackett.Common/Indexers/BaseIndexer.cs @@ -2,7 +2,6 @@ using System; using System.Collections.Generic; using System.Linq; using System.Text; -using System.Text.RegularExpressions; using System.Threading.Tasks; using AutoMapper; using Jackett.Common.Models; @@ -350,9 +349,6 @@ namespace Jackett.Common.Indexers public abstract class BaseWebIndexer : BaseIndexer, IWebIndexer { - // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie - private readonly Regex _cookieRegex = new Regex(@"([^\(\)<>@,;:\\""/\[\]\?=\{\}\s]+)=([^,;\\""\s]+)"); - protected BaseWebIndexer(string name, string link, string description, IIndexerConfigurationService configService, WebClient client, Logger logger, ConfigurationData configData, IProtectionService p, TorznabCapabilities caps = null, string downloadBase = null) : base(name, link, description, configService, logger, configData, p) { @@ -610,17 +606,13 @@ namespace Jackett.Common.Indexers private string ResolveCookies(string incomingCookies = "") { var redirRequestCookies = string.IsNullOrWhiteSpace(CookieHeader) ? incomingCookies : CookieHeader + " " + incomingCookies; - var cookieDictionary = new Dictionary(); - var matches = _cookieRegex.Match(redirRequestCookies); - while (matches.Success) - { - if (matches.Groups.Count > 2) - cookieDictionary[matches.Groups[1].Value] = matches.Groups[2].Value; - matches = matches.NextMatch(); - } - return string.Join("; ", cookieDictionary - .Where(kv => kv.Key != "cf_use_ob" && kv.Key != "cf_ob_info") // These cookies are causing BadGateway errors, so we drop them, see issue #2306 - .Select(kv => kv.Key.ToString() + "=" + kv.Value.ToString()).ToArray()); + var cookieDictionary = CookieUtil.CookieHeaderToDictionary(redirRequestCookies); + + // These cookies are causing BadGateway errors, so we drop them, see issue #2306 + cookieDictionary.Remove("cf_use_ob"); + cookieDictionary.Remove("cf_ob_info"); + + return CookieUtil.CookieDictionaryToHeader(cookieDictionary); } // Update CookieHeader with new cookies and save the config if something changed (e.g. a new CloudFlare clearance cookie was issued) diff --git a/src/Jackett.Common/Utils/Clients/HttpWebClient.cs b/src/Jackett.Common/Utils/Clients/HttpWebClient.cs index 70f97225a..3c9ae9f7d 100644 --- a/src/Jackett.Common/Utils/Clients/HttpWebClient.cs +++ b/src/Jackett.Common/Utils/Clients/HttpWebClient.cs @@ -133,21 +133,14 @@ namespace Jackett.Common.Utils.Clients ServicePointManager.SecurityProtocol = (SecurityProtocolType)192 | (SecurityProtocolType)768 | (SecurityProtocolType)3072; var cookies = new CookieContainer(); - if (!string.IsNullOrEmpty(webRequest.Cookies)) + if (!string.IsNullOrWhiteSpace(webRequest.Cookies)) { - var uri = new Uri(webRequest.Url); - var cookieUrl = new Uri(uri.Scheme + "://" + uri.Host); // don't include the path, Scheme is needed for mono compatibility - foreach (var c in webRequest.Cookies.Split(';')) - { - try - { - cookies.SetCookies(cookieUrl, c.Trim()); - } - catch (CookieException ex) - { - logger.Info("(Non-critical) Problem loading cookie {0}, {1}, {2}", uri, c, ex.Message); - } - } + // don't include the path, Scheme is needed for mono compatibility + var requestUri = new Uri(webRequest.Url); + var cookieUrl = new Uri(requestUri.Scheme + "://" + requestUri.Host); + var cookieDictionary = CookieUtil.CookieHeaderToDictionary(webRequest.Cookies); + foreach (var kv in cookieDictionary) + cookies.Add(cookieUrl, new Cookie(kv.Key, kv.Value)); } var userAgent = webRequest.EmulateBrowser.Value ? BrowserUtil.ChromeUserAgent : "Jackett/" + configService.GetVersion(); @@ -232,7 +225,7 @@ namespace Jackett.Common.Utils.Clients } // some cloudflare clients are using a refresh header - // Pull it out manually + // Pull it out manually if (response.StatusCode == HttpStatusCode.ServiceUnavailable && response.Headers.Contains("Refresh")) { var refreshHeaders = response.Headers.GetValues("Refresh"); diff --git a/src/Jackett.Common/Utils/Clients/HttpWebClient2.cs b/src/Jackett.Common/Utils/Clients/HttpWebClient2.cs index bd63379b9..76f7ed3c4 100644 --- a/src/Jackett.Common/Utils/Clients/HttpWebClient2.cs +++ b/src/Jackett.Common/Utils/Clients/HttpWebClient2.cs @@ -179,26 +179,16 @@ namespace Jackett.Common.Utils.Clients // clear cookies from cookiecontainer var oldCookies = cookies.GetCookies(request.RequestUri); foreach (Cookie oldCookie in oldCookies) - { oldCookie.Expired = true; - } - if (!string.IsNullOrEmpty(webRequest.Cookies)) + // add cookies to cookiecontainer + if (!string.IsNullOrWhiteSpace(webRequest.Cookies)) { - // add cookies to cookiecontainer - var cookieUrl = new Uri(request.RequestUri.Scheme + "://" + request.RequestUri.Host); // don't include the path, Scheme is needed for mono compatibility - foreach (var ccookiestr in webRequest.Cookies.Split(';')) - { - var cookiestrparts = ccookiestr.Split('='); - var name = cookiestrparts[0].Trim(); - if (string.IsNullOrWhiteSpace(name)) - continue; - var value = ""; - if (cookiestrparts.Length >= 2) - value = cookiestrparts[1].Trim(); - var cookie = new Cookie(name, value); - cookies.Add(cookieUrl, cookie); - } + // don't include the path, Scheme is needed for mono compatibility + var cookieUrl = new Uri(request.RequestUri.Scheme + "://" + request.RequestUri.Host); + var cookieDictionary = CookieUtil.CookieHeaderToDictionary(webRequest.Cookies); + foreach (var kv in cookieDictionary) + cookies.Add(cookieUrl, new Cookie(kv.Key, kv.Value)); } if (webRequest.Headers != null) @@ -254,7 +244,7 @@ namespace Jackett.Common.Utils.Clients } // some cloudflare clients are using a refresh header - // Pull it out manually + // Pull it out manually if (response.StatusCode == System.Net.HttpStatusCode.ServiceUnavailable && response.Headers.Contains("Refresh")) { var refreshHeaders = response.Headers.GetValues("Refresh"); diff --git a/src/Jackett.Common/Utils/Clients/HttpWebClient2NetCore.cs b/src/Jackett.Common/Utils/Clients/HttpWebClient2NetCore.cs index 5fbd5e989..5963d225c 100644 --- a/src/Jackett.Common/Utils/Clients/HttpWebClient2NetCore.cs +++ b/src/Jackett.Common/Utils/Clients/HttpWebClient2NetCore.cs @@ -175,26 +175,16 @@ namespace Jackett.Common.Utils.Clients // clear cookies from cookiecontainer var oldCookies = cookies.GetCookies(request.RequestUri); foreach (Cookie oldCookie in oldCookies) - { oldCookie.Expired = true; - } - if (!string.IsNullOrEmpty(webRequest.Cookies)) + // add cookies to cookiecontainer + if (!string.IsNullOrWhiteSpace(webRequest.Cookies)) { - // add cookies to cookiecontainer - var cookieUrl = new Uri(request.RequestUri.Scheme + "://" + request.RequestUri.Host); // don't include the path, Scheme is needed for mono compatibility - foreach (var ccookiestr in webRequest.Cookies.Split(';')) - { - var cookiestrparts = ccookiestr.Split('='); - var name = cookiestrparts[0].Trim(); - if (string.IsNullOrWhiteSpace(name)) - continue; - var value = ""; - if (cookiestrparts.Length >= 2) - value = cookiestrparts[1].Trim(); - var cookie = new Cookie(name, value); - cookies.Add(cookieUrl, cookie); - } + // don't include the path, Scheme is needed for mono compatibility + var cookieUrl = new Uri(request.RequestUri.Scheme + "://" + request.RequestUri.Host); + var cookieDictionary = CookieUtil.CookieHeaderToDictionary(webRequest.Cookies); + foreach (var kv in cookieDictionary) + cookies.Add(cookieUrl, new Cookie(kv.Key, kv.Value)); } if (webRequest.Headers != null) diff --git a/src/Jackett.Common/Utils/Clients/HttpWebClientNetCore.cs b/src/Jackett.Common/Utils/Clients/HttpWebClientNetCore.cs index e4828d4ce..175919338 100644 --- a/src/Jackett.Common/Utils/Clients/HttpWebClientNetCore.cs +++ b/src/Jackett.Common/Utils/Clients/HttpWebClientNetCore.cs @@ -129,21 +129,14 @@ namespace Jackett.Common.Utils.Clients ServicePointManager.SecurityProtocol = (SecurityProtocolType)192 | (SecurityProtocolType)768 | (SecurityProtocolType)3072; var cookies = new CookieContainer(); - if (!string.IsNullOrEmpty(webRequest.Cookies)) + if (!string.IsNullOrWhiteSpace(webRequest.Cookies)) { - var uri = new Uri(webRequest.Url); - var cookieUrl = new Uri(uri.Scheme + "://" + uri.Host); // don't include the path, Scheme is needed for mono compatibility - foreach (var c in webRequest.Cookies.Split(';')) - { - try - { - cookies.SetCookies(cookieUrl, c.Trim()); - } - catch (CookieException ex) - { - logger.Info("(Non-critical) Problem loading cookie {0}, {1}, {2}", uri, c, ex.Message); - } - } + // don't include the path, Scheme is needed for mono compatibility + var requestUri = new Uri(webRequest.Url); + var cookieUrl = new Uri(requestUri.Scheme + "://" + requestUri.Host); + var cookieDictionary = CookieUtil.CookieHeaderToDictionary(webRequest.Cookies); + foreach (var kv in cookieDictionary) + cookies.Add(cookieUrl, new Cookie(kv.Key, kv.Value)); } var userAgent = webRequest.EmulateBrowser.Value ? BrowserUtil.ChromeUserAgent : "Jackett/" + configService.GetVersion(); @@ -231,7 +224,7 @@ namespace Jackett.Common.Utils.Clients } // some cloudflare clients are using a refresh header - // Pull it out manually + // Pull it out manually if (response.StatusCode == HttpStatusCode.ServiceUnavailable && response.Headers.Contains("Refresh")) { var refreshHeaders = response.Headers.GetValues("Refresh"); diff --git a/src/Jackett.Common/Utils/CookieUtil.cs b/src/Jackett.Common/Utils/CookieUtil.cs new file mode 100644 index 000000000..0fbe81549 --- /dev/null +++ b/src/Jackett.Common/Utils/CookieUtil.cs @@ -0,0 +1,41 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; + +namespace Jackett.Common.Utils +{ + public static class CookieUtil + { + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie + // NOTE: we are not checking non-ascii characters and we should + private static readonly Regex _CookieRegex = new Regex(@"([^\(\)<>@,;:\\""/\[\]\?=\{\}\s]+)=([^,;\\""\s]+)"); + private static readonly char[] InvalidKeyChars = {'(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=', '{', '}', ' ', '\t', '\n'}; + private static readonly char[] InvalidValueChars = {'"', ',', ';', '\\', ' ', '\t', '\n'}; + + public static Dictionary CookieHeaderToDictionary(string cookieHeader) + { + var cookieDictionary = new Dictionary(); + if (cookieHeader == null) + return cookieDictionary; + var matches = _CookieRegex.Match(cookieHeader); + while (matches.Success) + { + if (matches.Groups.Count > 2) + cookieDictionary[matches.Groups[1].Value] = matches.Groups[2].Value; + matches = matches.NextMatch(); + } + return cookieDictionary; + } + + public static string CookieDictionaryToHeader(Dictionary cookieDictionary) + { + if (cookieDictionary == null) + return ""; + foreach (var kv in cookieDictionary) + if (kv.Key.IndexOfAny(InvalidKeyChars) > -1 || kv.Value.IndexOfAny(InvalidValueChars) > -1) + throw new FormatException($"The cookie '{kv.Key}={kv.Value}' is malformed."); + return string.Join("; ", cookieDictionary.Select(kv => kv.Key + "=" + kv.Value)); + } + } +} diff --git a/src/Jackett.Test/Utils/CookieUtilTests.cs b/src/Jackett.Test/Utils/CookieUtilTests.cs new file mode 100644 index 000000000..41f178ac3 --- /dev/null +++ b/src/Jackett.Test/Utils/CookieUtilTests.cs @@ -0,0 +1,104 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using Jackett.Common.Utils; +using NUnit.Framework; +using Assert = NUnit.Framework.Assert; +using CollectionAssert = NUnit.Framework.CollectionAssert; + +namespace Jackett.Test.Utils +{ + [TestFixture] + public class CookieUtilTests + { + [Test] + public void CookieHeaderToDictionaryGood() + { + // valid cookies with non-alpha characters in the value + var cookieHeader = "__cfduid=d6237f041586694295; __cf_bm=TlOng/xyqckk-TMen38z+0RFYA7YA="; + var expectedCookieDictionary = new Dictionary + { + {"__cfduid", "d6237f041586694295"}, {"__cf_bm", "TlOng/xyqckk-TMen38z+0RFYA7YA="} + }; + CollectionAssert.AreEqual(expectedCookieDictionary, CookieUtil.CookieHeaderToDictionary(cookieHeader)); + } + + [Test] + public void CookieHeaderToDictionaryDuplicateKeys() + { + // cookie with duplicate keys and whitespace separator instead of ; + // this cookie is not valid according to the standard, but it occurs in Jackett because we are concatenating + // cookies in many parts of the code (and we are not doing it well). this is safe because the whitespace + // can't be part of the key nor the value. + var cookieHeader = "__cfduid=d6237f041586694295; __cf_bm=TlOng/xyqckk-TMen38z+0RFYA7YA= __cf_bm=test"; + var expectedCookieDictionary = new Dictionary + { + {"__cfduid", "d6237f041586694295"}, + {"__cf_bm", "test"} // we always assume the latest value is the most recent + }; + CollectionAssert.AreEqual(expectedCookieDictionary, CookieUtil.CookieHeaderToDictionary(cookieHeader)); + } + + [Test] + public void CookieHeaderToDictionaryMalformed() + { + // malformed cookies + var cookieHeader = "__cfduidd6237f041586694295; __cf_;bm TlOng; good_cookie=value"; + var expectedCookieDictionary = new Dictionary {{"good_cookie", "value"},}; + CollectionAssert.AreEqual(expectedCookieDictionary, CookieUtil.CookieHeaderToDictionary(cookieHeader)); + } + + [Test] + [SuppressMessage("ReSharper", "CollectionNeverUpdated.Local")] + public void CookieHeaderToDictionaryNull() + { + // null cookie header + var expectedCookieDictionary = new Dictionary(); + CollectionAssert.AreEqual(expectedCookieDictionary, CookieUtil.CookieHeaderToDictionary(null)); + } + + [Test] + public void CookieDictionaryToHeaderGood() + { + // valid cookies with non-alpha characters in the value + var cookieDictionary = new Dictionary + { + {"__cfduid", "d6237f041586694295"}, {"__cf_bm", "TlOng/xyqckk-TMen38z+0RFYA7YA="} + }; + var expectedCookieHeader = "__cfduid=d6237f041586694295; __cf_bm=TlOng/xyqckk-TMen38z+0RFYA7YA="; + CollectionAssert.AreEqual(expectedCookieHeader, CookieUtil.CookieDictionaryToHeader(cookieDictionary)); + } + + [Test] + public void CookieDictionaryToHeaderMalformed1() + { + // malformed key + var cookieDictionary = new Dictionary + { + {"__cf_=bm", "34234234"} + }; + var ex = Assert.Throws(() => CookieUtil.CookieDictionaryToHeader(cookieDictionary)); + Assert.AreEqual( "The cookie '__cf_=bm=34234234' is malformed.", ex.Message); + } + + [Test] + public void CookieDictionaryToHeaderMalformed2() + { + // malformed value + var cookieDictionary = new Dictionary + { + {"__cf_bm", "34234 234"} + }; + var ex = Assert.Throws(() => CookieUtil.CookieDictionaryToHeader(cookieDictionary)); + Assert.AreEqual( "The cookie '__cf_bm=34234 234' is malformed.", ex.Message); + } + + [Test] + public void CookieDictionaryToHeaderNull() + { + // null cookie dictionary + var expectedCookieHeader = ""; + CollectionAssert.AreEqual(expectedCookieHeader, CookieUtil.CookieDictionaryToHeader(null)); + } + } +}