diff --git a/src/Jackett.Common/Indexers/Shazbat.cs b/src/Jackett.Common/Indexers/Shazbat.cs index cc2ec637b..adf3bdd54 100644 --- a/src/Jackett.Common/Indexers/Shazbat.cs +++ b/src/Jackett.Common/Indexers/Shazbat.cs @@ -5,9 +5,10 @@ using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; +using AngleSharp.Dom; using AngleSharp.Html.Parser; using Jackett.Common.Models; -using Jackett.Common.Models.IndexerConfig; +using Jackett.Common.Models.IndexerConfig.Bespoke; using Jackett.Common.Services.Interfaces; using Jackett.Common.Utils; using Jackett.Common.Utils.Clients; @@ -22,20 +23,16 @@ namespace Jackett.Common.Indexers private string LoginUrl => SiteLink + "login"; private string SearchUrl => SiteLink + "search"; private string TorrentsUrl => SiteLink + "torrents"; - private string ShowUrl => SiteLink + "show?id="; + private string ShowUrl => SiteLink + "show"; private string RSSProfile => SiteLink + "rss_feeds"; - private new ConfigurationDataBasicLoginWithRSS configData - { - get => (ConfigurationDataBasicLoginWithRSS)base.configData; - set => base.configData = value; - } + private new ConfigurationDataShazbat configData => (ConfigurationDataShazbat)base.configData; public Shazbat(IIndexerConfigurationService configService, WebClient c, Logger l, IProtectionService ps, ICacheService cs) : base(id: "shazbat", name: "Shazbat", - description: "Modern indexer", + description: "Shazbat is a PRIVATE Torrent Tracker with highly curated TV content", link: "https://www.shazbat.tv/", caps: new TorznabCapabilities { @@ -49,141 +46,236 @@ namespace Jackett.Common.Indexers logger: l, p: ps, cacheService: cs, - configData: new ConfigurationDataBasicLoginWithRSS()) + configData: new ConfigurationDataShazbat()) { Encoding = Encoding.UTF8; Language = "en-US"; Type = "private"; + webclient.requestDelay = 5.1; + AddCategoryMapping(1, TorznabCatType.TV); AddCategoryMapping(2, TorznabCatType.TVSD); AddCategoryMapping(3, TorznabCatType.TVHD); } + private int ShowPagesFetchLimit => int.TryParse(configData.ShowPagesFetchLimit.Value, out var limit) && limit > 0 && limit <= 5 ? limit : 2; + public override async Task ApplyConfiguration(JToken configJson) { LoadValuesFromJson(configJson); + var pairs = new Dictionary { - {"referer", "login"}, - {"query", ""}, - {"tv_login", configData.Username.Value}, - {"tv_password", configData.Password.Value}, - {"email", ""} + { "referer", "" }, + { "query", "" }, + { "tv_timezone", "0" }, + { "tv_login", configData.Username.Value }, + { "tv_password", configData.Password.Value } }; // Get cookie var result = await RequestLoginAndFollowRedirect(LoginUrl, pairs, null, true, null, LoginUrl); - await ConfigureIfOK(result.Cookies, result.ContentString?.Contains("glyphicon-log-out") == true, - () => throw new ExceptionWithConfigData("The username and password entered do not match.", configData)); + await ConfigureIfOK(result.Cookies, result.ContentString?.Contains("glyphicon-log-out") == true, () => + { + throw new ExceptionWithConfigData("The username and password entered do not match.", configData); + }); + var rssProfile = await RequestWithCookiesAndRetryAsync(RSSProfile); var parser = new HtmlParser(); var rssDom = parser.ParseDocument(rssProfile.ContentString); - configData.RSSKey.Value = rssDom.QuerySelector(".col-sm-9:nth-of-type(1)").TextContent.Trim(); + + configData.RSSKey.Value = rssDom.QuerySelector(".col-sm-9:nth-of-type(1)")?.TextContent.Trim(); if (string.IsNullOrWhiteSpace(configData.RSSKey.Value)) throw new ExceptionWithConfigData("Failed to find RSS key.", configData); + SaveConfig(); + return IndexerConfigurationStatus.RequiresTesting; } protected override async Task> PerformQuery(TorznabQuery query) { + WebResult response; + var releases = new List(); - var queryString = query.GetQueryString(); - WebResult results = null; - var searchUrls = new List(); - if (!string.IsNullOrWhiteSpace(query.SanitizedSearchTerm)) + var searchUrls = new List(); + + var hasGlobalFreeleech = false; + + var searchTerm = query.SanitizedSearchTerm; + var term = FixSearchTerm(searchTerm); + + var showTorrentsHeaders = new Dictionary { - var pairs = new Dictionary + { "Content-Type", "application/x-www-form-urlencoded" }, + { "X-Requested-With", "XMLHttpRequest" }, + }; + + var showTorrentsBody = new Dictionary + { + { "portlet", "true" }, + { "tab", "true" } + }; + + if (!string.IsNullOrWhiteSpace(term)) + { + var searchBody = new Dictionary { - {"search", query.SanitizedSearchTerm} + { "search", term } }; - results = await RequestWithCookiesAndRetryAsync( - SearchUrl, null, RequestType.POST, TorrentsUrl, pairs); - results = await ReloginIfNecessary(results); + + response = await RequestWithCookiesAndRetryAsync(SearchUrl, method: RequestType.POST, referer: TorrentsUrl, data: searchBody); + response = await ReloginIfNecessaryAsync(response); + var parser = new HtmlParser(); - var dom = parser.ParseDocument(results.ContentString); + var dom = parser.ParseDocument(response.ContentString); + + hasGlobalFreeleech = dom.QuerySelector("span:contains(\"Freeleech until:\"):has(span.datetime)") != null; + + releases.AddRange(ParseResults(response, query, searchTerm, hasGlobalFreeleech)); + var shows = dom.QuerySelectorAll("div.show[data-id]"); - foreach (var show in shows) + if (shows.Any()) { - var showUrl = ShowUrl + show.GetAttribute("data-id"); - searchUrls.Add(showUrl); - } - } - else - searchUrls.Add(TorrentsUrl); + var showPagesFetchLimit = ShowPagesFetchLimit; - try - { - foreach (var searchUrl in searchUrls) - { - results = await RequestWithCookiesAsync(searchUrl); - results = await ReloginIfNecessary(results); - var parser = new HtmlParser(); - var dom = parser.ParseDocument(results.ContentString); - var rows = dom.QuerySelectorAll( - string.IsNullOrWhiteSpace(queryString) ? "#torrent-table tr" : "table tr"); - var globalFreeleech = - dom.QuerySelector("span:contains(\"Freeleech until:\"):has(span.datetime)") != null; - foreach (var row in rows.Skip(1)) + if (showPagesFetchLimit < 1 || showPagesFetchLimit > 5) + throw new Exception($"Value for Show Pages Fetch Limit should be between 1 and 5. Current value: {showPagesFetchLimit}."); + + if (shows.Length > showPagesFetchLimit) + logger.Debug($"Your search returned {shows.Length} shows. Use a more specific search term for more relevant results."); + + foreach (var show in shows.Take(showPagesFetchLimit)) { - // TODO switch to initializer - var release = new ReleaseInfo(); - var titleRow = row.QuerySelector("td:nth-of-type(3)"); - foreach (var child in titleRow.Children) - child.Remove(); - release.Title = titleRow.TextContent.Trim(); - if ((query.ImdbID == null || !TorznabCaps.MovieSearchImdbAvailable) && - !query.MatchQueryStringAND(release.Title)) - continue; - var posterStyle = row.QuerySelector("div[style^=\"cursor: pointer; background-image:url\"]") - ?.GetAttribute("style"); - if (!string.IsNullOrEmpty(posterStyle)) + var showTorrentsQueryParams = new Dictionary { - var posterStr = Regex.Match(posterStyle, @"url\('(.*?)'\);").Groups[1].Value; - release.Poster = new Uri(SiteLink + posterStr); - } + { "id", show.GetAttribute("data-id") }, + { "show_mode", "torrents" } + }; - var qLink = row.QuerySelector("td:nth-of-type(5) a"); - release.Link = new Uri(SiteLink + qLink.GetAttribute("href")); - release.Guid = release.Link; - var qLinkComm = row.QuerySelector("td:nth-of-type(5) a.internal"); - release.Details = new Uri(SiteLink + qLinkComm.GetAttribute("href")); - var dateString = row.QuerySelector(".datetime")?.GetAttribute("data-timestamp"); - if (dateString != null) - release.PublishDate = DateTimeUtil.UnixTimestampToDateTime(ParseUtil.CoerceDouble(dateString)); - var infoString = row.QuerySelector("td:nth-of-type(4)").TextContent; - release.Size = ParseUtil.CoerceLong( - Regex.Match(infoString, "\\((\\d+)\\)").Value.Replace("(", "").Replace(")", "")); - var infosplit = infoString.Replace("/", string.Empty).Split(":".ToCharArray()); - release.Seeders = ParseUtil.CoerceInt(infosplit[1]); - release.Peers = release.Seeders + ParseUtil.CoerceInt(infosplit[2]); - release.DownloadVolumeFactor = globalFreeleech ? 0 : 1; - release.UploadVolumeFactor = 1; - release.MinimumRatio = 1; - release.MinimumSeedTime = 172800; // 48 hours - - // var tags = row.QuerySelector(".label-tag").TextContent; These don't see to parse - bad tags? - releases.Add(release); + searchUrls.Add(new WebRequest + { + Url = $"{ShowUrl}?{showTorrentsQueryParams.GetQueryString()}", + Type = RequestType.POST, + PostData = showTorrentsBody, + Headers = showTorrentsHeaders + }); } } } - catch (Exception ex) + else + searchUrls.Add(new WebRequest { Url = TorrentsUrl, Type = RequestType.GET }); + + foreach (var searchUrl in searchUrls) { - OnParseError(results.ContentString, ex); + response = await RequestWithCookiesAsync(url: searchUrl.Url, method: searchUrl.Type, data: searchUrl.PostData, headers: searchUrl.Headers); + response = await ReloginIfNecessaryAsync(response); + + try + { + releases.AddRange(ParseResults(response, query, searchTerm, hasGlobalFreeleech)); + } + catch (Exception ex) + { + OnParseError(response.ContentString, ex); + } } - foreach (var release in releases) - release.Category = release.Title.Contains("1080p") || release.Title.Contains("720p") - ? new List { TorznabCatType.TVHD.ID } - : new List { TorznabCatType.TVSD.ID }; + return releases; } - private async Task ReloginIfNecessary(WebResult response) + private IList ParseResults(WebResult response, TorznabQuery query, string searchTerm, bool hasGlobalFreeleech = false) { - if (response.ContentString.Contains("onclick=\"document.location='logout'\"")) + var releases = new List(); + + var parser = new HtmlParser(); + var dom = parser.ParseDocument(response.ContentString); + + if (!hasGlobalFreeleech) + hasGlobalFreeleech = dom.QuerySelector("span:contains(\"Freeleech until:\"):has(span.datetime)") != null; + + var publishDate = DateTime.Now; + + var rows = dom.QuerySelectorAll("#torrent-table tr.eprow, table tr.eprow"); + foreach (var row in rows) + { + var title = ParseTitle(row.QuerySelector("td:nth-of-type(3)")); + + if ((query.ImdbID == null || !TorznabCaps.MovieSearchImdbAvailable) && !query.MatchQueryStringAND(title, queryStringOverride: searchTerm)) + continue; + + var link = new Uri(SiteLink + row.QuerySelector("td:nth-of-type(5) a[href^=\"load_torrent?\"]")?.GetAttribute("href")); + var details = new Uri(SiteLink + row.QuerySelector("td:nth-of-type(5) [href^=\"torrent_info?\"]")?.GetAttribute("href")); + + var infoString = row.QuerySelector("td:nth-of-type(4)")?.TextContent.Trim() ?? string.Empty; + var infoRegex = new Regex(@"\((?\d+)\):(?\d+) \/ :(?\d+)$", RegexOptions.Compiled); + var matchInfo = infoRegex.Match(infoString); + var size = matchInfo.Groups["size"].Success && long.TryParse(matchInfo.Groups["size"].Value, out var outSize) ? outSize : 0; + var seeders = matchInfo.Groups["seeders"].Success && int.TryParse(matchInfo.Groups["seeders"].Value, out var outSeeders) ? outSeeders : 0; + var leechers = matchInfo.Groups["leechers"].Success && int.TryParse(matchInfo.Groups["leechers"].Value, out var outLeechers) ? outLeechers : 0; + + var dateTimestamp = row.QuerySelector(".datetime[data-timestamp]")?.GetAttribute("data-timestamp"); + publishDate = dateTimestamp != null && ParseUtil.TryCoerceDouble(dateTimestamp, out var timestamp) ? DateTimeUtil.UnixTimestampToDateTime(timestamp) : publishDate.AddMinutes(-1); + + var release = new ReleaseInfo + { + Guid = link, + Link = link, + Details = details, + Title = title, + Category = ParseCategories(title), + Size = size, + Seeders = seeders, + Peers = seeders + leechers, + PublishDate = publishDate, + Genres = row.QuerySelectorAll("label.label-tag").Select(t => t.TextContent.Trim()).ToList(), + DownloadVolumeFactor = hasGlobalFreeleech ? 0 : 1, + UploadVolumeFactor = 1, + MinimumRatio = 1, + MinimumSeedTime = 172800 // 48 hours + }; + + var posterStyle = row.QuerySelector("div[style^=\"cursor: pointer; background-image:url\"]")?.GetAttribute("style"); + if (!string.IsNullOrEmpty(posterStyle)) + { + var posterStr = Regex.Match(posterStyle, @"url\('(?.*)'\);").Groups["poster"].Value; + release.Poster = new Uri(SiteLink + posterStr); + } + + releases.Add(release); + } + + return releases; + } + + private static string ParseTitle(IElement titleRow) + { + var title = titleRow?.ChildNodes.First(n => n.NodeType == NodeType.Text && n.TextContent.Trim() != string.Empty); + + return title?.TextContent.Trim(); + } + + private static string FixSearchTerm(string term) + { + term = Regex.Replace(term, @"\b[S|E]\d+\b", string.Empty, RegexOptions.IgnoreCase); + term = Regex.Replace(term, @".+\b\d{4}(\.\d{2}\.\d{2})?\b", string.Empty); + term = Regex.Replace(term, @"[\.\s\(\)\[\]]+", " "); + + return term.ToLower().Trim(); + } + + protected virtual List ParseCategories(string title) => title.Contains("1080p") || title.Contains("1080i") || title.Contains("720p") ? new List { TorznabCatType.TVHD.ID } : new List { TorznabCatType.TVSD.ID }; + + private async Task ReloginIfNecessaryAsync(WebResult response) + { + if (response.ContentString.Contains("onclick=\"document.location='logout'\"") || + response.ContentString.Contains("show_id") || response.ContentString.Contains("Filename") || + response.ContentString.Contains("Peers") || response.ContentString.Contains("Download")) return response; + logger.Warn("Session expired. Relogin."); + await ApplyConfiguration(null); response.Request.Cookies = CookieHeader; return await webclient.GetResultAsync(response.Request); diff --git a/src/Jackett.Common/Models/IndexerConfig/Bespoke/ConfigurationDataShazbat.cs b/src/Jackett.Common/Models/IndexerConfig/Bespoke/ConfigurationDataShazbat.cs new file mode 100644 index 000000000..5cf8da513 --- /dev/null +++ b/src/Jackett.Common/Models/IndexerConfig/Bespoke/ConfigurationDataShazbat.cs @@ -0,0 +1,31 @@ +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using Newtonsoft.Json; + +namespace Jackett.Common.Models.IndexerConfig.Bespoke +{ + [ExcludeFromCodeCoverage] + internal class ConfigurationDataShazbat : ConfigurationDataBasicLoginWithRSS + { + public SingleSelectConfigurationItem ShowPagesFetchLimit { get; private set; } + + public DisplayInfoConfigurationItem ShowPagesFetchLimitInstructions { get; private set; } + + public ConfigurationDataShazbat() + { + ShowPagesFetchLimit = new SingleSelectConfigurationItem( + "Show Pages Fetch Limit (sub-requests when searching)", + new Dictionary + { + {"1", "1"}, + {"2", "2"}, + {"3", "3"}, + {"4", "4"}, + {"5", "5"} + }) + { Value = "2" }; + + ShowPagesFetchLimitInstructions = new DisplayInfoConfigurationItem("Show Pages Fetch Limit Warning", "Higher values may risk your account being flagged for bot activity when used with automation software such as Sonarr."); + } + } +} diff --git a/src/Jackett.Common/Models/IndexerConfig/Bespoke/ConfigurationDataSpeedCD.cs b/src/Jackett.Common/Models/IndexerConfig/Bespoke/ConfigurationDataSpeedCD.cs index 533285d7b..3138742d2 100644 --- a/src/Jackett.Common/Models/IndexerConfig/Bespoke/ConfigurationDataSpeedCD.cs +++ b/src/Jackett.Common/Models/IndexerConfig/Bespoke/ConfigurationDataSpeedCD.cs @@ -3,7 +3,7 @@ using System.Diagnostics.CodeAnalysis; namespace Jackett.Common.Models.IndexerConfig.Bespoke { [ExcludeFromCodeCoverage] - public class ConfigurationDataSpeedCD : ConfigurationDataBasicLogin + internal class ConfigurationDataSpeedCD : ConfigurationDataBasicLogin { public BoolConfigurationItem Freeleech { get; set; } public BoolConfigurationItem ExcludeArchives { get; set; }