Convert Nordicbits to AngleSharp (#7401)

This commit is contained in:
Diego Heras
2020-03-01 07:45:17 +01:00
committed by GitHub
parent db669b31a1
commit 35f9e0fbe0

View File

@@ -7,7 +7,9 @@ using System.Reflection;
using System.Text; using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
using CsQuery; using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using AngleSharp.Html.Parser;
using Jackett.Common.Helpers; using Jackett.Common.Helpers;
using Jackett.Common.Models; using Jackett.Common.Models;
using Jackett.Common.Models.IndexerConfig.Bespoke; using Jackett.Common.Models.IndexerConfig.Bespoke;
@@ -37,7 +39,7 @@ namespace Jackett.Common.Indexers
private static string Directory => Path.Combine(Path.GetTempPath(), "Jackett", MethodBase.GetCurrentMethod().DeclaringType?.Name); private static string Directory => Path.Combine(Path.GetTempPath(), "Jackett", MethodBase.GetCurrentMethod().DeclaringType?.Name);
private readonly Dictionary<string, string> _emulatedBrowserHeaders = new Dictionary<string, string>(); private readonly Dictionary<string, string> _emulatedBrowserHeaders = new Dictionary<string, string>();
private CQ _fDom;
private ConfigurationDataNordicbits ConfigData => (ConfigurationDataNordicbits)configData; private ConfigurationDataNordicbits ConfigData => (ConfigurationDataNordicbits)configData;
public Nordicbits(IIndexerConfigurationService configService, Utils.Clients.WebClient w, Logger l, IProtectionService ps) public Nordicbits(IIndexerConfigurationService configService, Utils.Clients.WebClient w, Logger l, IProtectionService ps)
@@ -68,6 +70,7 @@ namespace Jackett.Common.Indexers
// Books // Books
AddCategoryMapping("cat=54", TorznabCatType.AudioAudiobook, "Books - Audiobooks"); AddCategoryMapping("cat=54", TorznabCatType.AudioAudiobook, "Books - Audiobooks");
AddCategoryMapping("cat=9", TorznabCatType.BooksEbook, "Books - E-Books"); AddCategoryMapping("cat=9", TorznabCatType.BooksEbook, "Books - E-Books");
AddCategoryMapping("cat=84", TorznabCatType.BooksEbook, "Books - Education");
// Games // Games
AddCategoryMapping("cat=24", TorznabCatType.PCGames, "Games - PC"); AddCategoryMapping("cat=24", TorznabCatType.PCGames, "Games - PC");
@@ -91,6 +94,7 @@ namespace Jackett.Common.Indexers
AddCategoryMapping("cat=10", TorznabCatType.MoviesSD, "Movies - SD"); AddCategoryMapping("cat=10", TorznabCatType.MoviesSD, "Movies - SD");
AddCategoryMapping("cat=23", TorznabCatType.MoviesSD, "Movies - MP4 Tablet"); AddCategoryMapping("cat=23", TorznabCatType.MoviesSD, "Movies - MP4 Tablet");
AddCategoryMapping("cat=65", TorznabCatType.XXX, "Movies - Porn"); AddCategoryMapping("cat=65", TorznabCatType.XXX, "Movies - Porn");
AddCategoryMapping("cat=90", TorznabCatType.MoviesHD, "Movies - No Nordic Subs");
// Music // Music
AddCategoryMapping("cat=28", TorznabCatType.AudioLossless, "Music - FLAC"); AddCategoryMapping("cat=28", TorznabCatType.AudioLossless, "Music - FLAC");
@@ -104,6 +108,27 @@ namespace Jackett.Common.Indexers
AddCategoryMapping("cat=48", TorznabCatType.TVUHD, "TV - HD-4K/2160p"); AddCategoryMapping("cat=48", TorznabCatType.TVUHD, "TV - HD-4K/2160p");
AddCategoryMapping("cat=57", TorznabCatType.TVUHD, "TV - HD-4K/2160p Boxset"); AddCategoryMapping("cat=57", TorznabCatType.TVUHD, "TV - HD-4K/2160p Boxset");
AddCategoryMapping("cat=11", TorznabCatType.TVSD, "TV - Boxset"); AddCategoryMapping("cat=11", TorznabCatType.TVSD, "TV - Boxset");
AddCategoryMapping("cat=80", TorznabCatType.TVSD, "TV - Danish Boxset");
AddCategoryMapping("cat=71", TorznabCatType.TVHD, "TV - Danish HD-1080p");
AddCategoryMapping("cat=72", TorznabCatType.TVHD, "TV - Danish HD-720p");
AddCategoryMapping("cat=73", TorznabCatType.TVSD, "TV - Danish SD");
AddCategoryMapping("cat=83", TorznabCatType.TVSD, "TV - Finnish Boxset");
AddCategoryMapping("cat=77", TorznabCatType.TVHD, "TV - Finnish HD-1080p");
AddCategoryMapping("cat=78", TorznabCatType.TVHD, "TV - Finnish HD-720p");
AddCategoryMapping("cat=79", TorznabCatType.TVSD, "TV - Finnish SD");
AddCategoryMapping("cat=89", TorznabCatType.TVSD, "TV - Nordic Boxset");
AddCategoryMapping("cat=86", TorznabCatType.TVHD, "TV - Nordic HD-1080p");
AddCategoryMapping("cat=87", TorznabCatType.TVHD, "TV - Nordic HD-720p");
AddCategoryMapping("cat=88", TorznabCatType.TVSD, "TV - Nordic SD");
AddCategoryMapping("cat=82", TorznabCatType.TVSD, "TV - Norwegian Boxset");
AddCategoryMapping("cat=74", TorznabCatType.TVHD, "TV - Norwegian HD-1080p");
AddCategoryMapping("cat=75", TorznabCatType.TVHD, "TV - Norwegian HD-720p");
AddCategoryMapping("cat=76", TorznabCatType.TVSD, "TV - Norwegian SD");
AddCategoryMapping("cat=81", TorznabCatType.TVSD, "TV - Swedish Boxset");
AddCategoryMapping("cat=68", TorznabCatType.TVHD, "TV - Swedish HD-1080p");
AddCategoryMapping("cat=69", TorznabCatType.TVHD, "TV - Swedish HD-720p");
AddCategoryMapping("cat=70", TorznabCatType.TVSD, "TV - Swedish SD");
AddCategoryMapping("cat=91", TorznabCatType.TVHD, "TV - No Nordic Subs");
AddCategoryMapping("cat=7", TorznabCatType.TVHD, "TV - HD-1080p"); AddCategoryMapping("cat=7", TorznabCatType.TVHD, "TV - HD-1080p");
AddCategoryMapping("cat=31", TorznabCatType.TVHD, "TV - HD-1080p Boxset"); AddCategoryMapping("cat=31", TorznabCatType.TVHD, "TV - HD-1080p Boxset");
AddCategoryMapping("cat=30", TorznabCatType.TVHD, "TV - HD-720p"); AddCategoryMapping("cat=30", TorznabCatType.TVHD, "TV - HD-720p");
@@ -257,7 +282,6 @@ namespace Jackett.Common.Indexers
protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query) protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query)
{ {
var releases = new List<ReleaseInfo>(); var releases = new List<ReleaseInfo>();
var torrentRowList = new List<CQ>();
var exactSearchTerm = query.GetQueryString(); var exactSearchTerm = query.GetQueryString();
var searchUrl = SearchUrl; var searchUrl = SearchUrl;
@@ -293,86 +317,77 @@ namespace Jackett.Common.Indexers
// Getting results & Store content // Getting results & Store content
var response = await RequestStringWithCookiesAndRetry(request, ConfigData.CookieHeader.Value); var response = await RequestStringWithCookiesAndRetry(request, ConfigData.CookieHeader.Value);
_fDom = response.Content; var parser = new HtmlParser();
var dom = parser.ParseDocument(response.Content);
try try
{ {
var firstPageRows = FindTorrentRows(); var firstPageRows = FindTorrentRows(dom);
// Add them to torrents list
torrentRowList.AddRange(firstPageRows.Select(fRow => fRow.Cq()));
// If pagination available // If pagination available
int nbResults; int nbResults;
int pageLinkCount; int pageLinkCount;
nbResults = 1;
pageLinkCount = 1; pageLinkCount = 1;
// Check if we have a minimum of one result // Check if we have a minimum of one result
if (firstPageRows.Length > 1) if (firstPageRows?.Length > 1)
{ {
// Retrieve total count on our alone page // Retrieve total count on our alone page
nbResults = firstPageRows.Count(); nbResults = firstPageRows.Length;
} }
else else
{ {
// Check if no result
if (torrentRowList.Count == 0)
{
// No results found
Output("\nNo result found for your query, please try another search term or change the theme you're currently using on the site as this is an unsupported solution...\n", "info");
// No result found for this query // No result found for this query
Output("\nNo result found for your query, please try another search term or change the theme you're currently using on the site as this is an unsupported solution...\n", "info");
break; break;
} }
}
Output("\nFound " + nbResults + " result(s) (+/- " + firstPageRows.Length + ") in " + pageLinkCount + " page(s) for this query !"); Output("\nFound " + nbResults + " result(s) (+/- " + firstPageRows.Length + ") in " + pageLinkCount + " page(s) for this query !");
Output("\nThere are " + (firstPageRows.Length - 2) + " results on the first page !"); Output("\nThere are " + (firstPageRows.Length - 2) + " results on the first page !");
// Loop on results // Loop on results
foreach (var tRow in torrentRowList.Skip(1).Take(torrentRowList.Count - 2)) foreach (var row in firstPageRows.Skip(1).Take(firstPageRows.Length - 2))
{ {
Output("Torrent #" + (releases.Count + 1)); Output("Torrent #" + (releases.Count + 1));
// ID // ID
var idOrig = tRow.Find("td:eq(1) > a:eq(0)").Attr("href").Split('=')[1]; var idOrig = row.QuerySelector("td:nth-of-type(2) > a:nth-of-type(1)").GetAttribute("href").Split('=')[1];
var id = idOrig.Substring(0, idOrig.Length - 4); var id = idOrig.Substring(0, idOrig.Length - 4);
Output("ID: " + id); Output("ID: " + id);
// Release Name // Release Name
var name = tRow.Find("td:eq(1) > a:eq(0)").Text(); var name = row.QuerySelector("td:nth-of-type(2) > a:nth-of-type(1)").TextContent;
// Category // Category
var categoryID = tRow.Find("td:eq(0) > a:eq(0)").Attr("href").Split('?').Last(); var categoryId = row.QuerySelector("td:nth-of-type(1) > a:nth-of-type(1)").GetAttribute("href").Split('?').Last();
var newznab = MapTrackerCatToNewznab(categoryID); var newznab = MapTrackerCatToNewznab(categoryId);
Output("Category: " + (newznab.Count > 0 ? newznab.First().ToString() : "unknown category") + " (" + categoryID + ")"); Output("Category: " + (newznab.Count > 0 ? newznab.First().ToString() : "unknown category") + " (" + categoryId + ")");
// Seeders // Seeders
var seeders = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(9)").Text(), @"\d+").Value); var seeders = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(10)").TextContent, @"\d+").Value);
Output("Seeders: " + seeders); Output("Seeders: " + seeders);
// Leechers // Leechers
var leechers = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(10)").Text(), @"\d+").Value); var leechers = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(11)").TextContent, @"\d+").Value);
Output("Leechers: " + leechers); Output("Leechers: " + leechers);
// Files // Files
var files = 1; var files = 1;
files = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(4)").Text(), @"\d+").Value); files = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(5)").TextContent, @"\d+").Value);
Output("Files: " + files); Output("Files: " + files);
// Completed // Completed
var completed = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(8)").Text(), @"\d+").Value); var completed = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(9)").TextContent, @"\d+").Value);
Output("Completed: " + completed); Output("Completed: " + completed);
// Size // Size
var humanSize = tRow.Find("td:eq(7)").Text().ToLowerInvariant(); var humanSize = row.QuerySelector("td:nth-of-type(8)").TextContent.ToLowerInvariant();
var size = ReleaseInfo.GetBytes(humanSize); var size = ReleaseInfo.GetBytes(humanSize);
Output("Size: " + humanSize + " (" + size + " bytes)"); Output("Size: " + humanSize + " (" + size + " bytes)");
// Publish DateToString // Publish DateToString
var dateTimeOrig = tRow.Find("td:eq(6)").Text(); var dateTimeOrig = row.QuerySelector("td:nth-of-type(7)").TextContent;
var datestr = Regex.Replace(dateTimeOrig, @"<[^>]+>|&nbsp;", "").Trim(); var datestr = Regex.Replace(dateTimeOrig, @"<[^>]+>|&nbsp;", "").Trim();
datestr = Regex.Replace(datestr, "Today", DateTime.Now.ToString("MMM dd yyyy"), RegexOptions.IgnoreCase); datestr = Regex.Replace(datestr, "Today", DateTime.Now.ToString("MMM dd yyyy"), RegexOptions.IgnoreCase);
datestr = Regex.Replace(datestr, "Yesterday", DateTime.Now.Date.AddDays(-1).ToString("MMM dd yyyy"), RegexOptions.IgnoreCase); datestr = Regex.Replace(datestr, "Yesterday", DateTime.Now.Date.AddDays(-1).ToString("MMM dd yyyy"), RegexOptions.IgnoreCase);
@@ -388,7 +403,7 @@ namespace Jackett.Common.Indexers
Output("Comments Link: " + commentsLink.AbsoluteUri); Output("Comments Link: " + commentsLink.AbsoluteUri);
// Torrent Download URL // Torrent Download URL
var passkey = tRow.Find("td:eq(2) > a:eq(0)").Attr("href"); var passkey = row.QuerySelector("td:nth-of-type(3) > a:nth-of-type(1)").GetAttribute("href");
var key = Regex.Match(passkey, "(?<=torrent_pass\\=)([a-zA-z0-9]*)"); var key = Regex.Match(passkey, "(?<=torrent_pass\\=)([a-zA-z0-9]*)");
var downloadLink = new Uri(TorrentDownloadUrl.Replace("{id}", id.ToString()).Replace("{passkey}", key.ToString())); var downloadLink = new Uri(TorrentDownloadUrl.Replace("{id}", id.ToString()).Replace("{passkey}", key.ToString()));
Output("Download Link: " + downloadLink.AbsoluteUri); Output("Download Link: " + downloadLink.AbsoluteUri);
@@ -412,14 +427,14 @@ namespace Jackett.Common.Indexers
}; };
// IMDB // IMDB
var imdbLink = tRow.Find("a[href*=\"imdb.com/title/tt\"]").First().Attr("href"); var imdbLink = row.QuerySelector("a[href*=\"imdb.com/title/tt\"]")?.GetAttribute("href");
release.Imdb = ParseUtil.GetLongFromString(imdbLink); release.Imdb = ParseUtil.GetLongFromString(imdbLink);
if (tRow.Find("img[title=\"Free Torrent\"]").Length >= 1) if (row.QuerySelector("img[title=\"Free Torrent\"]") != null)
release.DownloadVolumeFactor = 0; release.DownloadVolumeFactor = 0;
else if (tRow.Find("img[title=\"Halfleech\"]").Length >= 1) else if (row.QuerySelector("img[title=\"Halfleech\"]") != null)
release.DownloadVolumeFactor = 0.5; release.DownloadVolumeFactor = 0.5;
else if (tRow.Find("img[title=\"90% Freeleech\"]").Length >= 1) else if (row.QuerySelector("img[title=\"90% Freeleech\"]") != null)
release.DownloadVolumeFactor = 0.1; release.DownloadVolumeFactor = 0.1;
else else
release.DownloadVolumeFactor = 1; release.DownloadVolumeFactor = 1;
@@ -481,7 +496,7 @@ namespace Jackett.Common.Indexers
categoriesList[i] = categoriesList[i].Replace("cat=", "cats5[]="); categoriesList[i] = categoriesList[i].Replace("cat=", "cats5[]=");
} }
// Books // Books
if (new[] { "54", "9" }.Any(c => categoriesList[i].Contains(categoriesList[i]))) if (new[] { "54", "9", "84" }.Any(c => categoriesList[i].Contains(categoriesList[i])))
{ {
categoriesList[i] = categoriesList[i].Replace("cat=", "cats6[]="); categoriesList[i] = categoriesList[i].Replace("cat=", "cats6[]=");
} }
@@ -491,7 +506,7 @@ namespace Jackett.Common.Indexers
categoriesList[i] = categoriesList[i].Replace("cat=", "cats3[]="); categoriesList[i] = categoriesList[i].Replace("cat=", "cats3[]=");
} }
// Movies // Movies
if (new[] { "35", "42", "47", "15", "58", "16", "6", "21", "19", "22", "20", "25", "10", "23", "65" }.Any(c => categoriesList[i].Contains(categoriesList[i]))) if (new[] { "35", "42", "47", "15", "58", "16", "6", "21", "19", "22", "20", "25", "10", "23", "65", "90" }.Any(c => categoriesList[i].Contains(categoriesList[i])))
{ {
categoriesList[i] = categoriesList[i].Replace("cat=", "cats1[]="); categoriesList[i] = categoriesList[i].Replace("cat=", "cats1[]=");
} }
@@ -501,7 +516,7 @@ namespace Jackett.Common.Indexers
categoriesList[i] = categoriesList[i].Replace("cat=", "cats4[]="); categoriesList[i] = categoriesList[i].Replace("cat=", "cats4[]=");
} }
// Series // Series
if (new[] { "48", "57", "11", "7", "31", "30", "32", "5", "66" }.Any(c => categoriesList[i].Contains(categoriesList[i]))) if (new[] { "48", "57", "11", "80", "71", "72", "73", "83", "77", "78", "79", "89", "86", "87", "88", "82", "74", "75", "76", "81", "68", "69", "70", "91", "7", "31", "30", "32", "5", "66" }.Any(c => categoriesList[i].Contains(categoriesList[i])))
{ {
categoriesList[i] = categoriesList[i].Replace("cat=", "cats2[]="); categoriesList[i] = categoriesList[i].Replace("cat=", "cats2[]=");
} }
@@ -674,35 +689,32 @@ namespace Jackett.Common.Indexers
/// <summary> /// <summary>
/// Find torrent rows in search pages /// Find torrent rows in search pages
/// </summary> /// </summary>
/// <returns>JQuery Object</returns> /// <returns>List of rows</returns>
private CQ FindTorrentRows() private IHtmlCollection<IElement> FindTorrentRows(IHtmlDocument dom)
{ {
var defaultTheme = new[] { "/templates/1/", "/templates/2/", "/templates/3/", "/templates/4/", "/templates/5/", "/templates/6/", "/templates/11/", "/templates/12/" }; var defaultTheme = new[] { "/templates/1/", "/templates/2/", "/templates/3/", "/templates/4/", "/templates/5/", "/templates/6/", "/templates/11/", "/templates/12/" };
var oldV2 = new[] { "/templates/7/", "/templates/8/", "/templates/9/", "/templates/10/" }; var oldV2 = new[] { "/templates/7/", "/templates/8/", "/templates/9/", "/templates/10/" };
var xmas = new[] { "/templates/14/" }; var xmas = new[] { "/templates/14/" };
if (xmas.Any(_fDom.Document.Body.InnerHTML.Contains)) if (xmas.Any(dom.Body.InnerHtml.Contains))
{ {
// Return all occurencis of torrents found // Return all occurrences of torrents found
// $('#base_around > table.mainouter > tbody > tr > td.outer > div.article > table > tbody:not(:first) > tr') return dom.QuerySelectorAll("#base_around > table.mainouter > tbody > tr > td.outer > div.article > table > tbody")[1].QuerySelectorAll("tr");
return _fDom["#base_around > table.mainouter > tbody > tr > td.outer > div.article > table > tbody:not(:first) > tr"];
} }
// template 7 contains a reference to template 2 (logout button), so check for oldV2 first // template 7 contains a reference to template 2 (logout button), so check for oldV2 first
if (oldV2.Any(_fDom.Document.Body.InnerHTML.Contains)) if (oldV2.Any(dom.Body.InnerHtml.Contains))
{ {
// Return all occurencis of torrents found // Return all occurrences of torrents found
// $('#base_content > table.mainouter > tbody > tr > td.outer > div.article > table > tbody > tr:not(:first)') return dom.QuerySelectorAll("#base_content > table.mainouter > tbody > tr > td.outer > div.article > table > tbody > tr").Skip(1).ToCollection();
return _fDom["# base_content > table.mainouter > tbody > tr > td.outer > div.article > table > tbody > tr:not(:first)"];
} }
if (defaultTheme.Any(_fDom.Document.Body.InnerHTML.Contains)) if (defaultTheme.Any(dom.Body.InnerHtml.Contains))
{ {
// Return all occurencis of torrents found // Return all occurrences of torrents found
// $('#base_content2 > div.article > table > tbody:not(:first) > tr') return dom.QuerySelectorAll("#base_content2 > div.article > table > tbody")[1].QuerySelectorAll("tr");
return _fDom["# base_content2 > div.article > table > tbody:not(:first) > tr"];
} }
return _fDom; return null;
} }
/// <summary> /// <summary>