From 2608a4cd44d1fff26998380681fd65e42cf5ae22 Mon Sep 17 00:00:00 2001 From: Cory Date: Sun, 29 Mar 2020 10:21:09 -0500 Subject: [PATCH] 720-pier: Update parsing. Resolves #7877 resolves #7190 (#7895) --- src/Jackett.Common/Indexers/720pier.cs | 214 ++++++++++--------------- 1 file changed, 84 insertions(+), 130 deletions(-) diff --git a/src/Jackett.Common/Indexers/720pier.cs b/src/Jackett.Common/Indexers/720pier.cs index 65cf49938..b75c4d869 100644 --- a/src/Jackett.Common/Indexers/720pier.cs +++ b/src/Jackett.Common/Indexers/720pier.cs @@ -1,8 +1,8 @@ using System; using System.Collections.Generic; using System.Collections.Specialized; -using System.Linq; using System.Text; +using System.Text.RegularExpressions; using System.Threading.Tasks; using AngleSharp.Html.Parser; using Jackett.Common.Models; @@ -17,34 +17,20 @@ namespace Jackett.Common.Indexers { public class Pier720 : BaseWebIndexer { - private string LoginUrl => SiteLink + "ucp.php?mode=login"; - private string SearchUrl => SiteLink + "search.php"; - - public override string[] LegacySiteLinks { get; protected set; } = { - "http://720pier.ru/", - }; - - private new ConfigurationDataBasicLoginWithRSSAndDisplay configData - { - get => (ConfigurationDataBasicLoginWithRSSAndDisplay)base.configData; - set => base.configData = value; - } - - public Pier720(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps) - : base(name: "720pier", - description: "720pier is a RUSSIAN Private Torrent Tracker for HD SPORTS", - link: "https://720pier.ru/", - caps: TorznabUtil.CreateDefaultTorznabTVCaps(), - configService: configService, - client: wc, - logger: l, - p: ps, - configData: new ConfigurationDataBasicLoginWithRSSAndDisplay()) + public Pier720(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps) : + base("720pier", + description: "720pier is a RUSSIAN Private Torrent Tracker for HD SPORTS", + link: "https://720pier.ru/", + caps: TorznabUtil.CreateDefaultTorznabTVCaps(), + configService: configService, + client: wc, + logger: l, + p: ps, + configData: new ConfigurationDataBasicLoginWithRSSAndDisplay()) { Encoding = Encoding.UTF8; Language = "ru-ru"; Type = "private"; - AddCategoryMapping(32, TorznabCatType.TVSport, "Basketball"); AddCategoryMapping(34, TorznabCatType.TVSport, "Basketball - NBA"); AddCategoryMapping(87, TorznabCatType.TVSport, "Basketball - NBA Playoffs"); @@ -58,7 +44,6 @@ namespace Jackett.Common.Indexers AddCategoryMapping(51, TorznabCatType.TVSport, "Basketball - Reviews and highlights"); AddCategoryMapping(41, TorznabCatType.TVSport, "Basketball - Other"); AddCategoryMapping(38, TorznabCatType.TVSport, "Basketball - Olympic Games"); - AddCategoryMapping(42, TorznabCatType.TVSport, "Football"); AddCategoryMapping(43, TorznabCatType.TVSport, "Football - NFL"); AddCategoryMapping(66, TorznabCatType.TVSport, "Football - Super Bowls"); @@ -68,7 +53,6 @@ namespace Jackett.Common.Indexers AddCategoryMapping(54, TorznabCatType.TVSport, "Football - Reviews and highlights"); AddCategoryMapping(97, TorznabCatType.TVSport, "Football - Documentaries"); AddCategoryMapping(44, TorznabCatType.TVSport, "Football - Other"); - AddCategoryMapping(46, TorznabCatType.TVSport, "Hockey"); AddCategoryMapping(48, TorznabCatType.TVSport, "Hockey - NHL"); AddCategoryMapping(88, TorznabCatType.TVSport, "Hockey - NHL Playoffs"); @@ -82,12 +66,10 @@ namespace Jackett.Common.Indexers AddCategoryMapping(68, TorznabCatType.TVSport, "Hockey - Documentaries"); AddCategoryMapping(64, TorznabCatType.TVSport, "Hockey - Reviews and highlights"); AddCategoryMapping(50, TorznabCatType.TVSport, "Hockey - Other"); - AddCategoryMapping(55, TorznabCatType.TVSport, "Baseball"); AddCategoryMapping(71, TorznabCatType.TVSport, "Baseball - MLB"); AddCategoryMapping(72, TorznabCatType.TVSport, "Baseball - Other"); AddCategoryMapping(85, TorznabCatType.TVSport, "Baseball - Reviews, highlights, documentaries"); - AddCategoryMapping(59, TorznabCatType.TVSport, "Soccer"); AddCategoryMapping(61, TorznabCatType.TVSport, "Soccer - English soccer"); AddCategoryMapping(86, TorznabCatType.TVSport, "Soccer - UEFA"); @@ -95,7 +77,6 @@ namespace Jackett.Common.Indexers AddCategoryMapping(62, TorznabCatType.TVSport, "Soccer - Other tournaments, championships"); AddCategoryMapping(63, TorznabCatType.TVSport, "Soccer - World Championships"); AddCategoryMapping(98, TorznabCatType.TVSport, "Soccer - FIFA World Cup"); - AddCategoryMapping(45, TorznabCatType.TVSport, "Other sports"); AddCategoryMapping(79, TorznabCatType.TVSport, "Other sports - Rugby"); AddCategoryMapping(78, TorznabCatType.TVSport, "Other sports - Lacrosse"); @@ -106,72 +87,60 @@ namespace Jackett.Common.Indexers AddCategoryMapping(73, TorznabCatType.TVSport, "Other sports - Auto, moto racing"); AddCategoryMapping(91, TorznabCatType.TVSport, "Other sports - Olympic Games"); AddCategoryMapping(94, TorznabCatType.TVSport, "Other sports - Misc"); - AddCategoryMapping(56, TorznabCatType.TVSport, "Sports on tv"); AddCategoryMapping(30, TorznabCatType.TVSport, "Sports"); } + private new ConfigurationDataBasicLoginWithRSSAndDisplay configData => (ConfigurationDataBasicLoginWithRSSAndDisplay)base.configData; + + public override string[] LegacySiteLinks { get; protected set; } = + { + "http://720pier.ru/" + }; + + private string LoginUrl => SiteLink + "ucp.php?mode=login"; + private string SearchUrl => SiteLink + "search.php"; + public override async Task ApplyConfiguration(JToken configJson) { LoadValuesFromJson(configJson); - var pairs = new Dictionary { - {"username", configData.Username.Value}, - {"password", configData.Password.Value}, - {"redirect", "/"}, - {"login", "Login"}, - {"autologin", "on"} + { "username", configData.Username.Value }, + { "password", configData.Password.Value }, + { "redirect", "/" }, + { "login", "Login" }, + { "autologin", "on" } }; var htmlParser = new HtmlParser(); var loginDocument = htmlParser.ParseDocument((await RequestStringWithCookies(LoginUrl)).Content); pairs["creation_time"] = loginDocument.GetElementsByName("creation_time")[0].GetAttribute("value"); pairs["form_token"] = loginDocument.GetElementsByName("form_token")[0].GetAttribute("value"); pairs["sid"] = loginDocument.GetElementsByName("sid")[0].GetAttribute("value"); - var result = await RequestLoginAndFollowRedirect(LoginUrl, pairs, null, true, null, LoginUrl, true); - await ConfigureIfOK(result.Cookies, result.Content?.Contains("ucp.php?mode=logout&") == true, () => - { - var errorMessage = result.Content; - throw new ExceptionWithConfigData(errorMessage, configData); - }); + await ConfigureIfOK( + result.Cookies, result.Content?.Contains("ucp.php?mode=logout&") == true, + () => throw new ExceptionWithConfigData(result.Content, configData)); return IndexerConfigurationStatus.RequiresTesting; } protected override async Task> PerformQuery(TorznabQuery query) { - var releases = new List(); var searchString = query.GetQueryString(); - - var queryCollection = new NameValueCollection - { - {"st", "0"}, - {"sd", "d"}, - {"sk", "t"}, - {"tracker_search", "torrent"}, - {"t", "0"}, - {"submit", "Search"}, - {"sr", "topics"}, - {"ot", "1" } - }; - - //queryCollection.Add("sr", "posts"); - //queryCollection.Add("ch", "99999"); - - // if the search string is empty use the getnew view - if (string.IsNullOrWhiteSpace(searchString)) - { - queryCollection.Add("search_id", "active_topics"); - } - else // use the normal search - { - searchString = searchString.Replace("-", " "); - queryCollection.Add("keywords", searchString); - queryCollection.Add("sf", "titleonly"); - queryCollection.Add("sr", "topics"); - queryCollection.Add("pt", "t"); - } - + var keywordSearch = !string.IsNullOrWhiteSpace(searchString); + var releases = new List(); + var queryCollection = !keywordSearch + ? new NameValueCollection + { + { "search_id", "active_topics" } + } + : new NameValueCollection + { + { "sr", "posts" }, //Search all posts + { "ot", "1" }, //Search only in forums trackers (checked) + { "keywords", searchString }, + { "sf", "titleonly" } + }; var searchUrl = SearchUrl + "?" + queryCollection.GetQueryString(); var results = await RequestStringWithCookies(searchUrl); if (!results.Content.Contains("ucp.php?mode=logout")) @@ -179,68 +148,53 @@ namespace Jackett.Common.Indexers await ApplyConfiguration(null); results = await RequestStringWithCookies(searchUrl); } + try { - const string rowsSelector = "ul.topics > li.row"; - var resultParser = new HtmlParser(); var searchResultDocument = resultParser.ParseDocument(results.Content); - var rows = searchResultDocument.QuerySelectorAll(rowsSelector); - foreach (var row in rows) + var rowSelector = keywordSearch + ? "div.search div.postbody > h3 > a" + : "ul.topics > li.row:has(i.fa-paperclip) a.topictitle"; // Torrent lines have paperclip icon. Chat topics don't + var rows = searchResultDocument.QuerySelectorAll(rowSelector); + foreach (var rowLink in rows) { - try + var detailLink = SiteLink + rowLink.GetAttribute("href"); + var detailsResult = await RequestStringWithCookies(detailLink); + var detailsDocument = resultParser.ParseDocument(detailsResult.Content); + var detailRow = detailsDocument.QuerySelector("table.table2 > tbody > tr"); + if (detailRow == null) + continue; //No torrents in result + var qDownloadLink = detailRow.QuerySelector("a[href^=\"/download/torrent\"]"); + var link = new Uri(SiteLink + qDownloadLink.GetAttribute("href").TrimStart('/')); + var timestr = detailRow.Children[0].QuerySelector("ul.dropdown-contents span.my_tt").TextContent; + var publishDate = DateTimeUtil.FromUnknown(timestr, "UK"); + var forumId = detailsDocument.QuerySelector("li.breadcrumbs").LastElementChild + .GetAttribute("data-forum-id"); + var sizeString = detailRow.Children[4].QuerySelector("span.my_tt").GetAttribute("title"); + var size = ParseUtil.CoerceLong(Regex.Replace(sizeString, @"[^0-9]", string.Empty)); + var comments = new Uri(detailLink); + var grabs = ParseUtil.CoerceInt(detailRow.Children[0].QuerySelector("span.complet").TextContent); + var seeders = ParseUtil.CoerceInt(detailRow.Children[2].QuerySelector("span.seed").TextContent); + var leechers = ParseUtil.CoerceInt(detailRow.Children[3].QuerySelector("span.leech").TextContent); + var release = new ReleaseInfo { - var seeders = ParseUtil.CoerceInt(row.QuerySelector("span.seed").TextContent); - var grabs = ParseUtil.CoerceLong(row.QuerySelector("span.complet").TextContent); - var qDetailsLink = row.QuerySelector("a.topictitle"); - var detailsResult = await RequestStringWithCookies(SiteLink + qDetailsLink.GetAttribute("href")); - var detailsResultDocument = resultParser.ParseDocument(detailsResult.Content); - var qDownloadLink = detailsResultDocument.QuerySelector("table.table2 > tbody > tr > td > a[href^=\"/download/torrent\"]"); - var author = row.QuerySelector("dd.lastpost > span"); - var timestr = author.TextContent.Split('\n') - .Where(str => !string.IsNullOrWhiteSpace(str)) //Filter blank lines - .Skip(1) //Skip author name - .FirstOrDefault() - .Trim(); - - var forum = row.QuerySelector("a[href^=\"./viewforum.php?f=\"]"); - var forumid = forum.GetAttribute("href").Split('=')[1]; - var sizeString = row.QuerySelector("dl.row-item > dt > div.list-inner > div[style^=\"float:right\"]") - .TextContent - .Replace("GiB", "GB") - .Replace("MiB", "MB") - .Replace("KiB", "KB") - .Replace("ГБ", "GB") - .Replace("МБ", "MB") - .Replace("КБ", "KB"); - var comments = new Uri(SiteLink + qDetailsLink.GetAttribute("href")); - var leechers = ParseUtil.CoerceInt(row.QuerySelector("span.leech").TextContent); - var link = new Uri(SiteLink + qDownloadLink.GetAttribute("href").TrimStart('/')); - var publishDate = DateTimeUtil.FromUnknown(timestr, "UK"); - var size = ReleaseInfo.GetBytes(sizeString); - var release = new ReleaseInfo - { - MinimumRatio = 1, - MinimumSeedTime = 0, - DownloadVolumeFactor = 1, - UploadVolumeFactor = 1, - Seeders = seeders, - Grabs = grabs, - Peers = leechers + seeders, - Title = qDetailsLink.TextContent, - Comments = comments, - Guid = comments, - Link = link, - PublishDate = publishDate, - Category = MapTrackerCatToNewznab(forumid), - Size = size, - }; - releases.Add(release); - } - catch (Exception ex) - { - logger.Error($"{ID}: Error while parsing row '{row.OuterHtml}':\n\n{ex}"); - } + MinimumRatio = 1, + MinimumSeedTime = 0, + DownloadVolumeFactor = 1, + UploadVolumeFactor = 1, + Seeders = seeders, + Grabs = grabs, + Peers = leechers + seeders, + Title = rowLink.TextContent, + Comments = comments, + Guid = comments, + Link = link, + PublishDate = publishDate, + Category = MapTrackerCatToNewznab(forumId), + Size = size, + }; + releases.Add(release); } } catch (Exception ex)