Convert Nordicbits to AngleSharp (#7401)

This commit is contained in:
Diego Heras
2020-03-01 07:45:17 +01:00
committed by GitHub
parent db669b31a1
commit 35f9e0fbe0

View File

@@ -7,7 +7,9 @@ using System.Reflection;
using System.Text; using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
using CsQuery; using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using AngleSharp.Html.Parser;
using Jackett.Common.Helpers; using Jackett.Common.Helpers;
using Jackett.Common.Models; using Jackett.Common.Models;
using Jackett.Common.Models.IndexerConfig.Bespoke; using Jackett.Common.Models.IndexerConfig.Bespoke;
@@ -37,7 +39,7 @@ namespace Jackett.Common.Indexers
private static string Directory => Path.Combine(Path.GetTempPath(), "Jackett", MethodBase.GetCurrentMethod().DeclaringType?.Name); private static string Directory => Path.Combine(Path.GetTempPath(), "Jackett", MethodBase.GetCurrentMethod().DeclaringType?.Name);
private readonly Dictionary<string, string> _emulatedBrowserHeaders = new Dictionary<string, string>(); private readonly Dictionary<string, string> _emulatedBrowserHeaders = new Dictionary<string, string>();
private CQ _fDom;
private ConfigurationDataNordicbits ConfigData => (ConfigurationDataNordicbits)configData; private ConfigurationDataNordicbits ConfigData => (ConfigurationDataNordicbits)configData;
public Nordicbits(IIndexerConfigurationService configService, Utils.Clients.WebClient w, Logger l, IProtectionService ps) public Nordicbits(IIndexerConfigurationService configService, Utils.Clients.WebClient w, Logger l, IProtectionService ps)
@@ -68,6 +70,7 @@ namespace Jackett.Common.Indexers
// Books // Books
AddCategoryMapping("cat=54", TorznabCatType.AudioAudiobook, "Books - Audiobooks"); AddCategoryMapping("cat=54", TorznabCatType.AudioAudiobook, "Books - Audiobooks");
AddCategoryMapping("cat=9", TorznabCatType.BooksEbook, "Books - E-Books"); AddCategoryMapping("cat=9", TorznabCatType.BooksEbook, "Books - E-Books");
AddCategoryMapping("cat=84", TorznabCatType.BooksEbook, "Books - Education");
// Games // Games
AddCategoryMapping("cat=24", TorznabCatType.PCGames, "Games - PC"); AddCategoryMapping("cat=24", TorznabCatType.PCGames, "Games - PC");
@@ -91,6 +94,7 @@ namespace Jackett.Common.Indexers
AddCategoryMapping("cat=10", TorznabCatType.MoviesSD, "Movies - SD"); AddCategoryMapping("cat=10", TorznabCatType.MoviesSD, "Movies - SD");
AddCategoryMapping("cat=23", TorznabCatType.MoviesSD, "Movies - MP4 Tablet"); AddCategoryMapping("cat=23", TorznabCatType.MoviesSD, "Movies - MP4 Tablet");
AddCategoryMapping("cat=65", TorznabCatType.XXX, "Movies - Porn"); AddCategoryMapping("cat=65", TorznabCatType.XXX, "Movies - Porn");
AddCategoryMapping("cat=90", TorznabCatType.MoviesHD, "Movies - No Nordic Subs");
// Music // Music
AddCategoryMapping("cat=28", TorznabCatType.AudioLossless, "Music - FLAC"); AddCategoryMapping("cat=28", TorznabCatType.AudioLossless, "Music - FLAC");
@@ -104,6 +108,27 @@ namespace Jackett.Common.Indexers
AddCategoryMapping("cat=48", TorznabCatType.TVUHD, "TV - HD-4K/2160p"); AddCategoryMapping("cat=48", TorznabCatType.TVUHD, "TV - HD-4K/2160p");
AddCategoryMapping("cat=57", TorznabCatType.TVUHD, "TV - HD-4K/2160p Boxset"); AddCategoryMapping("cat=57", TorznabCatType.TVUHD, "TV - HD-4K/2160p Boxset");
AddCategoryMapping("cat=11", TorznabCatType.TVSD, "TV - Boxset"); AddCategoryMapping("cat=11", TorznabCatType.TVSD, "TV - Boxset");
AddCategoryMapping("cat=80", TorznabCatType.TVSD, "TV - Danish Boxset");
AddCategoryMapping("cat=71", TorznabCatType.TVHD, "TV - Danish HD-1080p");
AddCategoryMapping("cat=72", TorznabCatType.TVHD, "TV - Danish HD-720p");
AddCategoryMapping("cat=73", TorznabCatType.TVSD, "TV - Danish SD");
AddCategoryMapping("cat=83", TorznabCatType.TVSD, "TV - Finnish Boxset");
AddCategoryMapping("cat=77", TorznabCatType.TVHD, "TV - Finnish HD-1080p");
AddCategoryMapping("cat=78", TorznabCatType.TVHD, "TV - Finnish HD-720p");
AddCategoryMapping("cat=79", TorznabCatType.TVSD, "TV - Finnish SD");
AddCategoryMapping("cat=89", TorznabCatType.TVSD, "TV - Nordic Boxset");
AddCategoryMapping("cat=86", TorznabCatType.TVHD, "TV - Nordic HD-1080p");
AddCategoryMapping("cat=87", TorznabCatType.TVHD, "TV - Nordic HD-720p");
AddCategoryMapping("cat=88", TorznabCatType.TVSD, "TV - Nordic SD");
AddCategoryMapping("cat=82", TorznabCatType.TVSD, "TV - Norwegian Boxset");
AddCategoryMapping("cat=74", TorznabCatType.TVHD, "TV - Norwegian HD-1080p");
AddCategoryMapping("cat=75", TorznabCatType.TVHD, "TV - Norwegian HD-720p");
AddCategoryMapping("cat=76", TorznabCatType.TVSD, "TV - Norwegian SD");
AddCategoryMapping("cat=81", TorznabCatType.TVSD, "TV - Swedish Boxset");
AddCategoryMapping("cat=68", TorznabCatType.TVHD, "TV - Swedish HD-1080p");
AddCategoryMapping("cat=69", TorznabCatType.TVHD, "TV - Swedish HD-720p");
AddCategoryMapping("cat=70", TorznabCatType.TVSD, "TV - Swedish SD");
AddCategoryMapping("cat=91", TorznabCatType.TVHD, "TV - No Nordic Subs");
AddCategoryMapping("cat=7", TorznabCatType.TVHD, "TV - HD-1080p"); AddCategoryMapping("cat=7", TorznabCatType.TVHD, "TV - HD-1080p");
AddCategoryMapping("cat=31", TorznabCatType.TVHD, "TV - HD-1080p Boxset"); AddCategoryMapping("cat=31", TorznabCatType.TVHD, "TV - HD-1080p Boxset");
AddCategoryMapping("cat=30", TorznabCatType.TVHD, "TV - HD-720p"); AddCategoryMapping("cat=30", TorznabCatType.TVHD, "TV - HD-720p");
@@ -257,7 +282,6 @@ namespace Jackett.Common.Indexers
protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query) protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query)
{ {
var releases = new List<ReleaseInfo>(); var releases = new List<ReleaseInfo>();
var torrentRowList = new List<CQ>();
var exactSearchTerm = query.GetQueryString(); var exactSearchTerm = query.GetQueryString();
var searchUrl = SearchUrl; var searchUrl = SearchUrl;
@@ -293,38 +317,29 @@ namespace Jackett.Common.Indexers
// Getting results & Store content // Getting results & Store content
var response = await RequestStringWithCookiesAndRetry(request, ConfigData.CookieHeader.Value); var response = await RequestStringWithCookiesAndRetry(request, ConfigData.CookieHeader.Value);
_fDom = response.Content; var parser = new HtmlParser();
var dom = parser.ParseDocument(response.Content);
try try
{ {
var firstPageRows = FindTorrentRows(); var firstPageRows = FindTorrentRows(dom);
// Add them to torrents list
torrentRowList.AddRange(firstPageRows.Select(fRow => fRow.Cq()));
// If pagination available // If pagination available
int nbResults; int nbResults;
int pageLinkCount; int pageLinkCount;
nbResults = 1;
pageLinkCount = 1; pageLinkCount = 1;
// Check if we have a minimum of one result // Check if we have a minimum of one result
if (firstPageRows.Length > 1) if (firstPageRows?.Length > 1)
{ {
// Retrieve total count on our alone page // Retrieve total count on our alone page
nbResults = firstPageRows.Count(); nbResults = firstPageRows.Length;
} }
else else
{ {
// Check if no result // No result found for this query
if (torrentRowList.Count == 0) Output("\nNo result found for your query, please try another search term or change the theme you're currently using on the site as this is an unsupported solution...\n", "info");
{ break;
// No results found
Output("\nNo result found for your query, please try another search term or change the theme you're currently using on the site as this is an unsupported solution...\n", "info");
// No result found for this query
break;
}
} }
Output("\nFound " + nbResults + " result(s) (+/- " + firstPageRows.Length + ") in " + pageLinkCount + " page(s) for this query !"); Output("\nFound " + nbResults + " result(s) (+/- " + firstPageRows.Length + ") in " + pageLinkCount + " page(s) for this query !");
@@ -332,47 +347,47 @@ namespace Jackett.Common.Indexers
// Loop on results // Loop on results
foreach (var tRow in torrentRowList.Skip(1).Take(torrentRowList.Count - 2)) foreach (var row in firstPageRows.Skip(1).Take(firstPageRows.Length - 2))
{ {
Output("Torrent #" + (releases.Count + 1)); Output("Torrent #" + (releases.Count + 1));
// ID // ID
var idOrig = tRow.Find("td:eq(1) > a:eq(0)").Attr("href").Split('=')[1]; var idOrig = row.QuerySelector("td:nth-of-type(2) > a:nth-of-type(1)").GetAttribute("href").Split('=')[1];
var id = idOrig.Substring(0, idOrig.Length - 4); var id = idOrig.Substring(0, idOrig.Length - 4);
Output("ID: " + id); Output("ID: " + id);
// Release Name // Release Name
var name = tRow.Find("td:eq(1) > a:eq(0)").Text(); var name = row.QuerySelector("td:nth-of-type(2) > a:nth-of-type(1)").TextContent;
// Category // Category
var categoryID = tRow.Find("td:eq(0) > a:eq(0)").Attr("href").Split('?').Last(); var categoryId = row.QuerySelector("td:nth-of-type(1) > a:nth-of-type(1)").GetAttribute("href").Split('?').Last();
var newznab = MapTrackerCatToNewznab(categoryID); var newznab = MapTrackerCatToNewznab(categoryId);
Output("Category: " + (newznab.Count > 0 ? newznab.First().ToString() : "unknown category") + " (" + categoryID + ")"); Output("Category: " + (newznab.Count > 0 ? newznab.First().ToString() : "unknown category") + " (" + categoryId + ")");
// Seeders // Seeders
var seeders = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(9)").Text(), @"\d+").Value); var seeders = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(10)").TextContent, @"\d+").Value);
Output("Seeders: " + seeders); Output("Seeders: " + seeders);
// Leechers // Leechers
var leechers = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(10)").Text(), @"\d+").Value); var leechers = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(11)").TextContent, @"\d+").Value);
Output("Leechers: " + leechers); Output("Leechers: " + leechers);
// Files // Files
var files = 1; var files = 1;
files = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(4)").Text(), @"\d+").Value); files = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(5)").TextContent, @"\d+").Value);
Output("Files: " + files); Output("Files: " + files);
// Completed // Completed
var completed = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(8)").Text(), @"\d+").Value); var completed = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(9)").TextContent, @"\d+").Value);
Output("Completed: " + completed); Output("Completed: " + completed);
// Size // Size
var humanSize = tRow.Find("td:eq(7)").Text().ToLowerInvariant(); var humanSize = row.QuerySelector("td:nth-of-type(8)").TextContent.ToLowerInvariant();
var size = ReleaseInfo.GetBytes(humanSize); var size = ReleaseInfo.GetBytes(humanSize);
Output("Size: " + humanSize + " (" + size + " bytes)"); Output("Size: " + humanSize + " (" + size + " bytes)");
// Publish DateToString // Publish DateToString
var dateTimeOrig = tRow.Find("td:eq(6)").Text(); var dateTimeOrig = row.QuerySelector("td:nth-of-type(7)").TextContent;
var datestr = Regex.Replace(dateTimeOrig, @"<[^>]+>|&nbsp;", "").Trim(); var datestr = Regex.Replace(dateTimeOrig, @"<[^>]+>|&nbsp;", "").Trim();
datestr = Regex.Replace(datestr, "Today", DateTime.Now.ToString("MMM dd yyyy"), RegexOptions.IgnoreCase); datestr = Regex.Replace(datestr, "Today", DateTime.Now.ToString("MMM dd yyyy"), RegexOptions.IgnoreCase);
datestr = Regex.Replace(datestr, "Yesterday", DateTime.Now.Date.AddDays(-1).ToString("MMM dd yyyy"), RegexOptions.IgnoreCase); datestr = Regex.Replace(datestr, "Yesterday", DateTime.Now.Date.AddDays(-1).ToString("MMM dd yyyy"), RegexOptions.IgnoreCase);
@@ -388,7 +403,7 @@ namespace Jackett.Common.Indexers
Output("Comments Link: " + commentsLink.AbsoluteUri); Output("Comments Link: " + commentsLink.AbsoluteUri);
// Torrent Download URL // Torrent Download URL
var passkey = tRow.Find("td:eq(2) > a:eq(0)").Attr("href"); var passkey = row.QuerySelector("td:nth-of-type(3) > a:nth-of-type(1)").GetAttribute("href");
var key = Regex.Match(passkey, "(?<=torrent_pass\\=)([a-zA-z0-9]*)"); var key = Regex.Match(passkey, "(?<=torrent_pass\\=)([a-zA-z0-9]*)");
var downloadLink = new Uri(TorrentDownloadUrl.Replace("{id}", id.ToString()).Replace("{passkey}", key.ToString())); var downloadLink = new Uri(TorrentDownloadUrl.Replace("{id}", id.ToString()).Replace("{passkey}", key.ToString()));
Output("Download Link: " + downloadLink.AbsoluteUri); Output("Download Link: " + downloadLink.AbsoluteUri);
@@ -412,14 +427,14 @@ namespace Jackett.Common.Indexers
}; };
// IMDB // IMDB
var imdbLink = tRow.Find("a[href*=\"imdb.com/title/tt\"]").First().Attr("href"); var imdbLink = row.QuerySelector("a[href*=\"imdb.com/title/tt\"]")?.GetAttribute("href");
release.Imdb = ParseUtil.GetLongFromString(imdbLink); release.Imdb = ParseUtil.GetLongFromString(imdbLink);
if (tRow.Find("img[title=\"Free Torrent\"]").Length >= 1) if (row.QuerySelector("img[title=\"Free Torrent\"]") != null)
release.DownloadVolumeFactor = 0; release.DownloadVolumeFactor = 0;
else if (tRow.Find("img[title=\"Halfleech\"]").Length >= 1) else if (row.QuerySelector("img[title=\"Halfleech\"]") != null)
release.DownloadVolumeFactor = 0.5; release.DownloadVolumeFactor = 0.5;
else if (tRow.Find("img[title=\"90% Freeleech\"]").Length >= 1) else if (row.QuerySelector("img[title=\"90% Freeleech\"]") != null)
release.DownloadVolumeFactor = 0.1; release.DownloadVolumeFactor = 0.1;
else else
release.DownloadVolumeFactor = 1; release.DownloadVolumeFactor = 1;
@@ -481,7 +496,7 @@ namespace Jackett.Common.Indexers
categoriesList[i] = categoriesList[i].Replace("cat=", "cats5[]="); categoriesList[i] = categoriesList[i].Replace("cat=", "cats5[]=");
} }
// Books // Books
if (new[] { "54", "9" }.Any(c => categoriesList[i].Contains(categoriesList[i]))) if (new[] { "54", "9", "84" }.Any(c => categoriesList[i].Contains(categoriesList[i])))
{ {
categoriesList[i] = categoriesList[i].Replace("cat=", "cats6[]="); categoriesList[i] = categoriesList[i].Replace("cat=", "cats6[]=");
} }
@@ -491,7 +506,7 @@ namespace Jackett.Common.Indexers
categoriesList[i] = categoriesList[i].Replace("cat=", "cats3[]="); categoriesList[i] = categoriesList[i].Replace("cat=", "cats3[]=");
} }
// Movies // Movies
if (new[] { "35", "42", "47", "15", "58", "16", "6", "21", "19", "22", "20", "25", "10", "23", "65" }.Any(c => categoriesList[i].Contains(categoriesList[i]))) if (new[] { "35", "42", "47", "15", "58", "16", "6", "21", "19", "22", "20", "25", "10", "23", "65", "90" }.Any(c => categoriesList[i].Contains(categoriesList[i])))
{ {
categoriesList[i] = categoriesList[i].Replace("cat=", "cats1[]="); categoriesList[i] = categoriesList[i].Replace("cat=", "cats1[]=");
} }
@@ -501,7 +516,7 @@ namespace Jackett.Common.Indexers
categoriesList[i] = categoriesList[i].Replace("cat=", "cats4[]="); categoriesList[i] = categoriesList[i].Replace("cat=", "cats4[]=");
} }
// Series // Series
if (new[] { "48", "57", "11", "7", "31", "30", "32", "5", "66" }.Any(c => categoriesList[i].Contains(categoriesList[i]))) if (new[] { "48", "57", "11", "80", "71", "72", "73", "83", "77", "78", "79", "89", "86", "87", "88", "82", "74", "75", "76", "81", "68", "69", "70", "91", "7", "31", "30", "32", "5", "66" }.Any(c => categoriesList[i].Contains(categoriesList[i])))
{ {
categoriesList[i] = categoriesList[i].Replace("cat=", "cats2[]="); categoriesList[i] = categoriesList[i].Replace("cat=", "cats2[]=");
} }
@@ -674,35 +689,32 @@ namespace Jackett.Common.Indexers
/// <summary> /// <summary>
/// Find torrent rows in search pages /// Find torrent rows in search pages
/// </summary> /// </summary>
/// <returns>JQuery Object</returns> /// <returns>List of rows</returns>
private CQ FindTorrentRows() private IHtmlCollection<IElement> FindTorrentRows(IHtmlDocument dom)
{ {
var defaultTheme = new[] { "/templates/1/", "/templates/2/", "/templates/3/", "/templates/4/", "/templates/5/", "/templates/6/", "/templates/11/", "/templates/12/" }; var defaultTheme = new[] { "/templates/1/", "/templates/2/", "/templates/3/", "/templates/4/", "/templates/5/", "/templates/6/", "/templates/11/", "/templates/12/" };
var oldV2 = new[] { "/templates/7/", "/templates/8/", "/templates/9/", "/templates/10/" }; var oldV2 = new[] { "/templates/7/", "/templates/8/", "/templates/9/", "/templates/10/" };
var xmas = new[] { "/templates/14/" }; var xmas = new[] { "/templates/14/" };
if (xmas.Any(_fDom.Document.Body.InnerHTML.Contains)) if (xmas.Any(dom.Body.InnerHtml.Contains))
{ {
// Return all occurencis of torrents found // Return all occurrences of torrents found
// $('#base_around > table.mainouter > tbody > tr > td.outer > div.article > table > tbody:not(:first) > tr') return dom.QuerySelectorAll("#base_around > table.mainouter > tbody > tr > td.outer > div.article > table > tbody")[1].QuerySelectorAll("tr");
return _fDom["#base_around > table.mainouter > tbody > tr > td.outer > div.article > table > tbody:not(:first) > tr"];
} }
// template 7 contains a reference to template 2 (logout button), so check for oldV2 first // template 7 contains a reference to template 2 (logout button), so check for oldV2 first
if (oldV2.Any(_fDom.Document.Body.InnerHTML.Contains)) if (oldV2.Any(dom.Body.InnerHtml.Contains))
{ {
// Return all occurencis of torrents found // Return all occurrences of torrents found
// $('#base_content > table.mainouter > tbody > tr > td.outer > div.article > table > tbody > tr:not(:first)') return dom.QuerySelectorAll("#base_content > table.mainouter > tbody > tr > td.outer > div.article > table > tbody > tr").Skip(1).ToCollection();
return _fDom["# base_content > table.mainouter > tbody > tr > td.outer > div.article > table > tbody > tr:not(:first)"];
} }
if (defaultTheme.Any(_fDom.Document.Body.InnerHTML.Contains)) if (defaultTheme.Any(dom.Body.InnerHtml.Contains))
{ {
// Return all occurencis of torrents found // Return all occurrences of torrents found
// $('#base_content2 > div.article > table > tbody:not(:first) > tr') return dom.QuerySelectorAll("#base_content2 > div.article > table > tbody")[1].QuerySelectorAll("tr");
return _fDom["# base_content2 > div.article > table > tbody:not(:first) > tr"];
} }
return _fDom; return null;
} }
/// <summary> /// <summary>