Convert Shazbat to AngleSharp (#7400)

This commit is contained in:
Cory
2020-02-29 17:27:09 -06:00
committed by GitHub
parent fd8823f6bd
commit f238e78337

View File

@@ -4,7 +4,7 @@ using System.Linq;
using System.Text; using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
using CsQuery; using AngleSharp.Html.Parser;
using Jackett.Common.Models; using Jackett.Common.Models;
using Jackett.Common.Models.IndexerConfig; using Jackett.Common.Models.IndexerConfig;
using Jackett.Common.Services.Interfaces; using Jackett.Common.Services.Interfaces;
@@ -66,8 +66,9 @@ namespace Jackett.Common.Indexers
() => throw new ExceptionWithConfigData("The username and password entered do not match.", configData)); () => throw new ExceptionWithConfigData("The username and password entered do not match.", configData));
var rssProfile = await RequestStringWithCookiesAndRetry(RSSProfile); var rssProfile = await RequestStringWithCookiesAndRetry(RSSProfile);
CQ rssDom = rssProfile.Content; var parser = new HtmlParser();
configData.RSSKey.Value = rssDom.Find(".col-sm-9:eq(0)").Text().Trim(); var rssDom = parser.ParseDocument(rssProfile.Content);
configData.RSSKey.Value = rssDom.QuerySelector(".col-sm-9:nth-of-type(1)").TextContent.Trim();
if (string.IsNullOrWhiteSpace(configData.RSSKey.Value)) if (string.IsNullOrWhiteSpace(configData.RSSKey.Value))
{ {
throw new ExceptionWithConfigData("Failed to find RSS key.", configData); throw new ExceptionWithConfigData("Failed to find RSS key.", configData);
@@ -105,8 +106,9 @@ namespace Jackett.Common.Indexers
results = await PostDataWithCookiesAndRetry(SearchUrl, pairs, null, TorrentsUrl); results = await PostDataWithCookiesAndRetry(SearchUrl, pairs, null, TorrentsUrl);
results = await ReloginIfNecessary(results); results = await ReloginIfNecessary(results);
CQ dom = results.Content; var parser = new HtmlParser();
var shows = dom.Find("div.show[data-id]"); var dom = parser.ParseDocument(results.Content);
var shows = dom.QuerySelectorAll("div.show[data-id]");
foreach (var show in shows) foreach (var show in shows)
{ {
var showUrl = ShowUrl + show.GetAttribute("data-id"); var showUrl = ShowUrl + show.GetAttribute("data-id");
@@ -125,44 +127,42 @@ namespace Jackett.Common.Indexers
results = await RequestStringWithCookies(searchUrl); results = await RequestStringWithCookies(searchUrl);
results = await ReloginIfNecessary(results); results = await ReloginIfNecessary(results);
CQ dom = results.Content; var parser = new HtmlParser();
var rows = dom["#torrent-table tr"]; var dom = parser.ParseDocument(results.Content);
var rows = dom.QuerySelectorAll(
string.IsNullOrWhiteSpace(queryString) ? "#torrent-table tr" : "table tr");
if (!string.IsNullOrWhiteSpace(queryString)) var globalFreeleech =
{ dom.QuerySelector("span:contains(\"Freeleech until:\"):has(span.datetime)") != null;
rows = dom["table tr"];
}
var globalFreeleech = dom.Find("span:contains(\"Freeleech until:\"):has(span.datetime)").Any();
foreach (var row in rows.Skip(1)) foreach (var row in rows.Skip(1))
{ {
var release = new ReleaseInfo(); var release = new ReleaseInfo();
var qRow = row.Cq(); var titleRow = row.QuerySelector("td:nth-of-type(3)");
var titleRow = qRow.Find("td:eq(2)").First(); foreach (var child in titleRow.Children)
titleRow.Children().Remove(); child.Remove();
release.Title = titleRow.Text().Trim(); release.Title = titleRow.TextContent.Trim();
if ((query.ImdbID == null || !TorznabCaps.SupportsImdbMovieSearch) && !query.MatchQueryStringAND(release.Title)) if ((query.ImdbID == null || !TorznabCaps.SupportsImdbMovieSearch) && !query.MatchQueryStringAND(release.Title))
continue; continue;
var qBanner = qRow.Find("div[style^=\"cursor: pointer; background-image:url\"]"); var qBanner = row.QuerySelector("div[style^=\"cursor: pointer; background-image:url\"]");
var qBannerStyle = qBanner.Attr("style"); var qBannerStyle = qBanner.GetAttribute("style");
if (!string.IsNullOrEmpty(qBannerStyle)) if (!string.IsNullOrEmpty(qBannerStyle))
{ {
var bannerImg = Regex.Match(qBannerStyle, @"url\('(.*?)'\);").Groups[1].Value; var bannerImg = Regex.Match(qBannerStyle, @"url\('(.*?)'\);").Groups[1].Value;
release.BannerUrl = new Uri(SiteLink + bannerImg); release.BannerUrl = new Uri(SiteLink + bannerImg);
} }
var qLink = row.Cq().Find("td:eq(4) a:eq(0)"); var qLink = row.QuerySelector("td:nth-of-type(5) a:nth-of-type(1)");
release.Link = new Uri(SiteLink + qLink.Attr("href")); release.Link = new Uri(SiteLink + qLink.GetAttribute("href"));
release.Guid = release.Link; release.Guid = release.Link;
var qLinkComm = row.Cq().Find("td:eq(4) a:eq(1)"); var qLinkComm = row.QuerySelector("td:nth-of-type(5) a:nth-of-type(2)");
release.Comments = new Uri(SiteLink + qLinkComm.Attr("href")); release.Comments = new Uri(SiteLink + qLinkComm.GetAttribute("href"));
var dateString = qRow.Find(".datetime").Attr("data-timestamp"); var dateString = row.QuerySelector(".datetime").GetAttribute("data-timestamp");
if (dateString != null) if (dateString != null)
release.PublishDate = DateTimeUtil.UnixTimestampToDateTime(ParseUtil.CoerceDouble(dateString)).ToLocalTime(); release.PublishDate = DateTimeUtil.UnixTimestampToDateTime(ParseUtil.CoerceDouble(dateString)).ToLocalTime();
var infoString = row.Cq().Find("td:eq(3)").Text(); var infoString = row.QuerySelector("td:nth-of-type(4)").TextContent;
release.Size = ParseUtil.CoerceLong(Regex.Match(infoString, "\\((\\d+)\\)").Value.Replace("(", "").Replace(")", "")); release.Size = ParseUtil.CoerceLong(Regex.Match(infoString, "\\((\\d+)\\)").Value.Replace("(", "").Replace(")", ""));
@@ -177,7 +177,7 @@ namespace Jackett.Common.Indexers
release.UploadVolumeFactor = 1; release.UploadVolumeFactor = 1;
// var tags = row.Cq().Find(".label-tag").Text(); These don't see to parse - bad tags? // var tags = row.QuerySelector(".label-tag").TextContent; These don't see to parse - bad tags?
releases.Add(release); releases.Add(release);
} }