From 3b4eceed879a4ccd9302c0a7bbf78b3cc16f85ff Mon Sep 17 00:00:00 2001 From: chibidev Date: Sat, 3 Jun 2017 15:04:51 +0200 Subject: [PATCH] Feature/improved aggregate results (#1432) * Line endings... * Add fallback query for meta indexers In cases where multiple indexers are configured under one metaindexer if any of them supports IMDB search the meta will support IMDB search as well. However the actual query will then only be performed by those supporting IMDB search, because others refuse it (see CanHandleQuery implementation). - This adds support of a fallback mechanism for other indexers - Adds first implementation of result improvement (necessary for fallback queries as they might produce irrelevant results) - Some minor fixes encountered while debugging/coding Known issue: - Configuring nCore and IsoHunt together will render results from nCore unusuable. Don't know why. --- src/Jackett/Controllers/TorznabController.cs | 5 +- src/Jackett/Indexers/BaseIndexer.cs | 2 + src/Jackett/Indexers/Meta/BaseMetaIndexer.cs | 84 +++++++++++++++++-- src/Jackett/Indexers/Meta/MetaIndexers.cs | 7 +- src/Jackett/Models/ReleaseInfo.cs | 7 ++ src/Jackett/Models/TorznabCapabilities.cs | 2 +- src/Jackett/Models/TorznabQuery.cs | 42 ++++++++++ src/Jackett/Services/IndexerManagerService.cs | 2 +- 8 files changed, 134 insertions(+), 17 deletions(-) diff --git a/src/Jackett/Controllers/TorznabController.cs b/src/Jackett/Controllers/TorznabController.cs index ef299138e..397e548d2 100644 --- a/src/Jackett/Controllers/TorznabController.cs +++ b/src/Jackett/Controllers/TorznabController.cs @@ -116,10 +116,9 @@ namespace Jackett.Controllers releases = indexer.CleanLinks(releases); // Some trackers do not keep their clocks up to date and can be ~20 minutes out! - foreach (var release in releases) + foreach (var release in releases.Where(r => r.PublishDate > DateTime.Now)) { - if (release.PublishDate > DateTime.Now) - release.PublishDate = DateTime.Now; + release.PublishDate = DateTime.Now; } // Some trackers do not support multiple category filtering so filter the releases that match manually. diff --git a/src/Jackett/Indexers/BaseIndexer.cs b/src/Jackett/Indexers/BaseIndexer.cs index 05ef60d42..4dd6d052a 100644 --- a/src/Jackett/Indexers/BaseIndexer.cs +++ b/src/Jackett/Indexers/BaseIndexer.cs @@ -644,6 +644,8 @@ namespace Jackett.Indexers public bool CanHandleQuery(TorznabQuery query) { + if (query == null) + return false; var caps = TorznabCaps; if (!caps.SearchAvailable && query.IsSearch) return false; diff --git a/src/Jackett/Indexers/Meta/BaseMetaIndexer.cs b/src/Jackett/Indexers/Meta/BaseMetaIndexer.cs index 8e80744e8..c13bcdf79 100644 --- a/src/Jackett/Indexers/Meta/BaseMetaIndexer.cs +++ b/src/Jackett/Indexers/Meta/BaseMetaIndexer.cs @@ -1,7 +1,9 @@ -using System; +using System; using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; +using System.Web; +using CsQuery; using Jackett.Models; using Jackett.Models.IndexerConfig; using Jackett.Services; @@ -11,10 +13,40 @@ using NLog; namespace Jackett.Indexers.Meta { + public class ImdbResolver { + public ImdbResolver(IWebClient webClient) { + WebClient = webClient; + } + + public async Task> GetAllTitles(string imdbId) { + if (!imdbId.StartsWith("tt")) + imdbId = "tt" + imdbId; + var request = new WebRequest("http://www.imdb.com/title/" + imdbId + "/releaseinfo"); + var result = await WebClient.GetString(request); + + CQ dom = result.Content; + + var mainTitle = dom["h3[itemprop=name]"].Find("a")[0].InnerHTML.Replace("\"", ""); + + var akas = dom["table#akas"].Find("tbody").Find("tr"); + var titleList = new List(); + titleList.Add(mainTitle); + foreach (var row in akas) { + string title = row.FirstElementChild.InnerHTML; + if (title == "(original title)" || title == "") + titleList.Add(HttpUtility.HtmlDecode(row.FirstElementChild.NextElementSibling.InnerHTML)); + } + + return titleList; + } + + private IWebClient WebClient; + } + public abstract class BaseMetaIndexer : BaseIndexer, IIndexer { - protected BaseMetaIndexer(string name, string description, IIndexerManagerService manager, Logger logger, ConfigurationData configData, IProtectionService p, Func filter) - : base(name, "http://127.0.0.1/", description, manager, null, logger, configData, p, null, null) + protected BaseMetaIndexer(string name, string description, IIndexerManagerService manager, IWebClient webClient, Logger logger, ConfigurationData configData, IProtectionService p, Func filter) + : base(name, "http://127.0.0.1/", description, manager, webClient, logger, configData, p, null, null) { filterFunc = filter; } @@ -26,16 +58,50 @@ namespace Jackett.Indexers.Meta public virtual async Task> PerformQuery(TorznabQuery query) { - var tasks = Indexers.Where(i => i.CanHandleQuery(query)).Select(i => i.PerformQuery(query)).ToList(); // explicit conversion to List to execute LINQ query - var aggregateTask = Task.WhenAll>(tasks); - await aggregateTask; - if (aggregateTask.Exception != null) - logger.Error(aggregateTask.Exception, "Error during request in metaindexer " + ID); + IEnumerable>> tasks = Indexers.Where(i => i.CanHandleQuery(query)).Select(i => i.PerformQuery(query)).ToList(); // explicit conversion to List to execute LINQ query - IEnumerable result = tasks.Where(x => x.Status == TaskStatus.RanToCompletion).SelectMany(x => x.Result).OrderByDescending(r => r.PublishDate); // Ordering by the number of seeders might be useful as well. + bool needFallback = query.IsImdbQuery; + IEnumerable fallbackTitles = null; + if (needFallback) { + var imdb = new ImdbResolver(webclient); + fallbackTitles = await imdb.GetAllTitles(query.ImdbID); + var fallbackQueries = fallbackTitles.Select(t => query.CreateFallback(t)); + var backupTasks = fallbackQueries.SelectMany(q => Indexers.Where(i => !i.CanHandleQuery(query) && i.CanHandleQuery(q)).Select(i => i.PerformQuery(q.Clone()))); + tasks = tasks.Concat(backupTasks.ToList()); // explicit conversion to List to execute LINQ query + } + + var aggregateTask = Task.WhenAll>(tasks); + try { + await aggregateTask; + } catch { + logger.Error(aggregateTask.Exception, "Error during request in metaindexer " + ID); + } + + var unorderedResult = tasks.Where(x => x.Status == TaskStatus.RanToCompletion).SelectMany(x => x.Result); + var orderedResult = unorderedResult.Where(r => { + var normalizedTitles = fallbackTitles.Concat(fallbackTitles.Select(t => t.Replace(' ', '.').Replace(":", ""))).Select(t => t.ToLowerInvariant()); + foreach (var title in normalizedTitles) { + if (r.Title.ToLowerInvariant().Contains(title)) + return true; + } + return false; + }).OrderByDescending(r => r.Gain); + + var filteredResult = orderedResult.Where(r => { + if (r.Imdb != null) { + try { + return Int64.Parse(query.ImdbID.Select(c => char.IsDigit(c)).ToString()) == r.Imdb; + } catch { + // Cannot safely determine whether result is what we + // wanted, so let's just leave it alone... + } + } + return true; + }); // Limiting the response size might be interesting for use-cases where there are // tons of trackers configured in Jackett. For now just use the limit param if // someone wants to do that. + IEnumerable result = filteredResult; if (query.Limit > 0) result = result.Take(query.Limit); return result; diff --git a/src/Jackett/Indexers/Meta/MetaIndexers.cs b/src/Jackett/Indexers/Meta/MetaIndexers.cs index 15a1391cf..deddd7737 100644 --- a/src/Jackett/Indexers/Meta/MetaIndexers.cs +++ b/src/Jackett/Indexers/Meta/MetaIndexers.cs @@ -8,13 +8,14 @@ using Newtonsoft.Json.Linq; using Jackett.Services; using Jackett.Utils.Clients; using NLog; +using Jackett.Models.IndexerConfig; namespace Jackett.Indexers.Meta { - class AggregateIndexer : BaseMetaIndexer, IIndexer + class AggregateIndexer : BaseMetaIndexer { - public AggregateIndexer(IIndexerManagerService i, Logger l, IProtectionService ps) - : base("AggregateSearch", "This feed includes all configured trackers", i, l, new Models.IndexerConfig.ConfigurationData(), ps, x => true) + public AggregateIndexer(IIndexerManagerService i, IWebClient wc, Logger l, IProtectionService ps) + : base("AggregateSearch", "This feed includes all configured trackers", i, wc, l, new ConfigurationData(), ps, x => true) { } } diff --git a/src/Jackett/Models/ReleaseInfo.cs b/src/Jackett/Models/ReleaseInfo.cs index e93671b30..9c2926109 100644 --- a/src/Jackett/Models/ReleaseInfo.cs +++ b/src/Jackett/Models/ReleaseInfo.cs @@ -36,6 +36,13 @@ namespace Jackett.Models public double? DownloadVolumeFactor { get; set; } public double? UploadVolumeFactor { get; set; } + public double? Gain { + get { + var sizeInGB = Size / 1024.0 / 1024.0 / 1024.0; + return Seeders * sizeInGB; + } + } + public object Clone() { return new ReleaseInfo() diff --git a/src/Jackett/Models/TorznabCapabilities.cs b/src/Jackett/Models/TorznabCapabilities.cs index 48499c808..a4875d297 100644 --- a/src/Jackett/Models/TorznabCapabilities.cs +++ b/src/Jackett/Models/TorznabCapabilities.cs @@ -117,7 +117,7 @@ namespace Jackett.Models return xdoc.Declaration.ToString() + Environment.NewLine + xdoc.ToString(); } - public static TorznabCapabilities Concat (TorznabCapabilities lhs, TorznabCapabilities rhs) + public static TorznabCapabilities Concat(TorznabCapabilities lhs, TorznabCapabilities rhs) { lhs.SearchAvailable = lhs.SearchAvailable || rhs.SearchAvailable; lhs.TVSearchAvailable = lhs.TVSearchAvailable || rhs.TVSearchAvailable; diff --git a/src/Jackett/Models/TorznabQuery.cs b/src/Jackett/Models/TorznabQuery.cs index 49122b3df..197c0a631 100644 --- a/src/Jackett/Models/TorznabQuery.cs +++ b/src/Jackett/Models/TorznabQuery.cs @@ -111,6 +111,48 @@ namespace Jackett.Models IsTest = false; } + public TorznabQuery CreateFallback(string search) { + var ret = Clone(); + if (Categories == null || Categories.Length == 0) { + ret.Categories = new int[]{ TorznabCatType.Movies.ID, + TorznabCatType.MoviesForeign.ID, + TorznabCatType.MoviesOther.ID, + TorznabCatType.MoviesSD.ID, + TorznabCatType.MoviesHD.ID, + TorznabCatType.Movies3D.ID, + TorznabCatType.MoviesBluRay.ID, + TorznabCatType.MoviesDVD.ID, + TorznabCatType.MoviesWEBDL.ID, + }; + } + ret.SearchTerm = search; + + return ret; + } + + public TorznabQuery Clone() { + var ret = new TorznabQuery(); + ret.QueryType = QueryType; + if (Categories != null && Categories.Length > 0) { + ret.Categories = new int [Categories.Length]; + Array.Copy (Categories, ret.Categories, Categories.Length); + } + ret.Extended = Extended; + ret.ApiKey = ApiKey; + ret.Limit = Limit; + ret.Offset = Offset; + ret.Season = Season; + ret.Episode = Episode; + ret.SearchTerm = SearchTerm; + ret.IsTest = IsTest; + if (QueryStringParts != null && QueryStringParts.Length > 0) { + ret.QueryStringParts = new string [QueryStringParts.Length]; + Array.Copy (QueryStringParts, ret.QueryStringParts, QueryStringParts.Length); + } + + return ret; + } + public string GetQueryString() { return (SanitizedSearchTerm + " " + GetEpisodeSearchString()).Trim(); diff --git a/src/Jackett/Services/IndexerManagerService.cs b/src/Jackett/Services/IndexerManagerService.cs index dea89597d..f5c3276b2 100644 --- a/src/Jackett/Services/IndexerManagerService.cs +++ b/src/Jackett/Services/IndexerManagerService.cs @@ -132,7 +132,7 @@ namespace Jackett.Services public void InitAggregateIndexer() { logger.Info("Adding aggregate indexer"); - AggregateIndexer aggregateIndexer = new AggregateIndexer(this, logger, container.Resolve()); + AggregateIndexer aggregateIndexer = new AggregateIndexer(this, container.Resolve(), logger, container.Resolve()); this.aggregateIndexer = aggregateIndexer; UpdateAggregateIndexer(); }