Convert Abnormal to AngleSharp (#7403)

This commit is contained in:
Diego Heras
2020-03-01 02:46:34 +01:00
committed by GitHub
parent a3c1f3eb52
commit c1b2328bbf

View File

@@ -9,7 +9,9 @@ using System.Reflection;
using System.Text; using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
using CsQuery; using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using AngleSharp.Html.Parser;
using Jackett.Common.Models; using Jackett.Common.Models;
using Jackett.Common.Models.IndexerConfig.Bespoke; using Jackett.Common.Models.IndexerConfig.Bespoke;
using Jackett.Common.Services.Interfaces; using Jackett.Common.Services.Interfaces;
@@ -39,7 +41,6 @@ namespace Jackett.Common.Indexers
private static string Directory => Path.Combine(Path.GetTempPath(), Assembly.GetExecutingAssembly().GetName().Name.ToLower(), MethodBase.GetCurrentMethod().DeclaringType?.Name.ToLower()); private static string Directory => Path.Combine(Path.GetTempPath(), Assembly.GetExecutingAssembly().GetName().Name.ToLower(), MethodBase.GetCurrentMethod().DeclaringType?.Name.ToLower());
private readonly Dictionary<string, string> emulatedBrowserHeaders = new Dictionary<string, string>(); private readonly Dictionary<string, string> emulatedBrowserHeaders = new Dictionary<string, string>();
private CQ fDom = null;
private ConfigurationDataAbnormal ConfigData private ConfigurationDataAbnormal ConfigData
{ {
@@ -170,11 +171,12 @@ namespace Jackett.Common.Indexers
await ConfigureIfOK(response.Cookies, response.Cookies.Contains("session="), () => await ConfigureIfOK(response.Cookies, response.Cookies.Contains("session="), () =>
{ {
// Parse error page // Parse error page
CQ dom = response.Content; var parser = new HtmlParser();
var message = dom[".warning"].Text().Split('.').Reverse().Skip(1).First(); var dom = parser.ParseDocument(response.Content);
var message = dom.QuerySelector(".warning").TextContent.Split('.').Reverse().Skip(1).First();
// Try left // Try left
var left = dom[".info"].Text().Trim(); var left = dom.QuerySelector(".info").TextContent.Trim();
// Oops, unable to login // Oops, unable to login
output("-> Login failed: \"" + message + "\" and " + left + " tries left before being banned for 6 hours !", "error"); output("-> Login failed: \"" + message + "\" and " + left + " tries left before being banned for 6 hours !", "error");
@@ -201,7 +203,7 @@ namespace Jackett.Common.Indexers
var FranceTz = TimeZoneInfo.CreateCustomTimeZone("W. Europe Standard Time", new TimeSpan(1, 0, 0), "(GMT+01:00) W. Europe Standard Time", "W. Europe Standard Time", "W. Europe DST Time", adjustments); var FranceTz = TimeZoneInfo.CreateCustomTimeZone("W. Europe Standard Time", new TimeSpan(1, 0, 0), "(GMT+01:00) W. Europe Standard Time", "W. Europe Standard Time", "W. Europe DST Time", adjustments);
var releases = new List<ReleaseInfo>(); var releases = new List<ReleaseInfo>();
var torrentRowList = new List<CQ>(); var qRowList = new List<IElement>();
var searchTerm = query.GetQueryString(); var searchTerm = query.GetQueryString();
var searchUrl = SearchUrl; var searchUrl = SearchUrl;
var nbResults = 0; var nbResults = 0;
@@ -226,27 +228,26 @@ namespace Jackett.Common.Indexers
var request = buildQuery(searchTerm, query, searchUrl); var request = buildQuery(searchTerm, query, searchUrl);
// Getting results & Store content // Getting results & Store content
fDom = await queryExec(request); var parser = new HtmlParser();
var dom = parser.ParseDocument(await queryExec(request));
try try
{ {
// Find torrent rows // Find torrent rows
var firstPageRows = findTorrentRows(); var firstPageRows = findTorrentRows(dom);
// Add them to torrents list // Add them to torrents list
torrentRowList.AddRange(firstPageRows.Select(fRow => fRow.Cq())); qRowList.AddRange(firstPageRows);
// Check if there are pagination links at bottom // Check if there are pagination links at bottom
var pagination = (fDom[".linkbox > a"].Length != 0); var qPagination = dom.QuerySelectorAll(".linkbox > a");
if (qPagination.Length > 0)
// If pagination available
if (pagination)
{ {
// Calculate numbers of pages available for this search query (Based on number results and number of torrents on first page) // Calculate numbers of pages available for this search query (Based on number results and number of torrents on first page)
pageLinkCount = ParseUtil.CoerceInt(Regex.Match(fDom[".linkbox > a"].Last().Attr("href").ToString(), @"\d+").Value); pageLinkCount = ParseUtil.CoerceInt(Regex.Match(qPagination.Last().GetAttribute("href").ToString(), @"\d+").Value);
// Calculate average number of results (based on torrents rows lenght on first page) // Calculate average number of results (based on torrents rows lenght on first page)
nbResults = firstPageRows.Count() * pageLinkCount; nbResults = firstPageRows.Length * pageLinkCount;
} }
else else
{ {
@@ -254,7 +255,7 @@ namespace Jackett.Common.Indexers
if (firstPageRows.Length >= 1) if (firstPageRows.Length >= 1)
{ {
// Retrieve total count on our alone page // Retrieve total count on our alone page
nbResults = firstPageRows.Count(); nbResults = firstPageRows.Length;
pageLinkCount = 1; pageLinkCount = 1;
} }
else else
@@ -282,27 +283,28 @@ namespace Jackett.Common.Indexers
var pageRequest = buildQuery(searchTerm, query, searchUrl, i); var pageRequest = buildQuery(searchTerm, query, searchUrl, i);
// Getting results & Store content // Getting results & Store content
fDom = await queryExec(pageRequest); parser = new HtmlParser();
dom = parser.ParseDocument(await queryExec(pageRequest));
// Process page results // Process page results
var additionalPageRows = findTorrentRows(); var additionalPageRows = findTorrentRows(dom);
// Add them to torrents list // Add them to torrents list
torrentRowList.AddRange(additionalPageRows.Select(fRow => fRow.Cq())); qRowList.AddRange(additionalPageRows);
} }
} }
// Loop on results // Loop on results
foreach (var tRow in torrentRowList) foreach (var row in qRowList)
{ {
output("\n=>> Torrent #" + (releases.Count + 1)); output("\n=>> Torrent #" + (releases.Count + 1));
// ID // ID
var id = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(1) > a").Attr("href").ToString(), @"\d+").Value); var id = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(2) > a").GetAttribute("href"), @"\d+").Value);
output("ID: " + id); output("ID: " + id);
// Release Name // Release Name
var name = tRow.Find("td:eq(1) > a").Text(); var name = row.QuerySelector("td:nth-of-type(2) > a").TextContent;
//issue #3847 replace multi keyword //issue #3847 replace multi keyword
if (!string.IsNullOrEmpty(ReplaceMulti)) if (!string.IsNullOrEmpty(ReplaceMulti))
{ {
@@ -312,29 +314,29 @@ namespace Jackett.Common.Indexers
output("Release: " + name); output("Release: " + name);
// Category // Category
var categoryID = tRow.Find("td:eq(0) > a").Attr("href").Replace("torrents.php?cat[]=", string.Empty); var categoryId = row.QuerySelector("td:nth-of-type(1) > a").GetAttribute("href").Replace("torrents.php?cat[]=", string.Empty);
var newznab = MapTrackerCatToNewznab(categoryID); var newznab = MapTrackerCatToNewznab(categoryId);
output("Category: " + MapTrackerCatToNewznab(categoryID).First().ToString() + " (" + categoryID + ")"); output("Category: " + MapTrackerCatToNewznab(categoryId).First().ToString() + " (" + categoryId + ")");
// Seeders // Seeders
var seeders = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(5)").Text(), @"\d+").Value); var seeders = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(6)").TextContent, @"\d+").Value);
output("Seeders: " + seeders); output("Seeders: " + seeders);
// Leechers // Leechers
var leechers = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(6)").Text(), @"\d+").Value); var leechers = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(7)").TextContent, @"\d+").Value);
output("Leechers: " + leechers); output("Leechers: " + leechers);
// Completed // Completed
var completed = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(5)").Text(), @"\d+").Value); var completed = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(6)").TextContent, @"\d+").Value);
output("Completed: " + completed); output("Completed: " + completed);
// Size // Size
var sizeStr = tRow.Find("td:eq(4)").Text().Replace("Go", "gb").Replace("Mo", "mb").Replace("Ko", "kb"); var sizeStr = row.QuerySelector("td:nth-of-type(5)").TextContent.Replace("Go", "gb").Replace("Mo", "mb").Replace("Ko", "kb");
var size = ReleaseInfo.GetBytes(sizeStr); var size = ReleaseInfo.GetBytes(sizeStr);
output("Size: " + sizeStr + " (" + size + " bytes)"); output("Size: " + sizeStr + " (" + size + " bytes)");
// Publish DateToString // Publish DateToString
var datestr = tRow.Find("span.time").Attr("title"); var datestr = row.QuerySelector("span.time").GetAttribute("title");
var dateLocal = DateTime.SpecifyKind(DateTime.ParseExact(datestr, "MMM dd yyyy, HH:mm", CultureInfo.InvariantCulture), DateTimeKind.Unspecified); var dateLocal = DateTime.SpecifyKind(DateTime.ParseExact(datestr, "MMM dd yyyy, HH:mm", CultureInfo.InvariantCulture), DateTimeKind.Unspecified);
var date = TimeZoneInfo.ConvertTimeToUtc(dateLocal, FranceTz); var date = TimeZoneInfo.ConvertTimeToUtc(dateLocal, FranceTz);
output("Released on: " + date); output("Released on: " + date);
@@ -349,7 +351,7 @@ namespace Jackett.Common.Indexers
// Torrent Download URL // Torrent Download URL
Uri downloadLink = null; Uri downloadLink = null;
var link = tRow.Find("td:eq(3) > a").Attr("href"); var link = row.QuerySelector("td:nth-of-type(4) > a").GetAttribute("href");
if (!string.IsNullOrEmpty(link)) if (!string.IsNullOrEmpty(link))
{ {
// Download link available // Download link available
@@ -365,16 +367,16 @@ namespace Jackett.Common.Indexers
// Freeleech // Freeleech
var downloadVolumeFactor = 1; var downloadVolumeFactor = 1;
if (tRow.Find("img[alt=\"Freeleech\"]").Length >= 1) if (row.QuerySelector("img[alt=\"Freeleech\"]") != null)
{ {
downloadVolumeFactor = 0; downloadVolumeFactor = 0;
output("FreeLeech =)"); output("FreeLeech =)");
} }
// Building release infos // Building release infos
var release = new ReleaseInfo() var release = new ReleaseInfo
{ {
Category = MapTrackerCatToNewznab(categoryID.ToString()), Category = MapTrackerCatToNewznab(categoryId),
Title = name, Title = name,
Seeders = seeders, Seeders = seeders,
Peers = seeders + leechers, Peers = seeders + leechers,
@@ -632,9 +634,9 @@ namespace Jackett.Common.Indexers
/// <summary> /// <summary>
/// Find torrent rows in search pages /// Find torrent rows in search pages
/// </summary> /// </summary>
/// <returns>JQuery Object</returns> /// <returns>List of rows</returns>
// Return all occurences of torrents found private IHtmlCollection<IElement> findTorrentRows(IHtmlDocument dom) =>
private CQ findTorrentRows() => fDom[".torrent_table > tbody > tr"].Not(".colhead"); dom.QuerySelectorAll(".torrent_table > tbody > tr:not(.colhead)");
/// <summary> /// <summary>
/// Output message for logging or developpment (console) /// Output message for logging or developpment (console)