DivxTotal improvements. Resolves #11027 (#11078)

This commit is contained in:
XYZJR
2021-02-13 21:57:18 +01:00
committed by GitHub
parent f1e2fbf750
commit 0a2fea89c6

View File

@@ -9,6 +9,7 @@ using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
using AngleSharp.Dom; using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using AngleSharp.Html.Parser; using AngleSharp.Html.Parser;
using Jackett.Common.Models; using Jackett.Common.Models;
using Jackett.Common.Models.IndexerConfig; using Jackett.Common.Models.IndexerConfig;
@@ -25,8 +26,9 @@ namespace Jackett.Common.Indexers
public class DivxTotal : BaseWebIndexer public class DivxTotal : BaseWebIndexer
{ {
private const string DownloadLink = "/download_tt.php"; private const string DownloadLink = "/download_tt.php";
private const int MaxResultsPerPage = 15; private const int MaxNrOfResults = 100;
private const int MaxSearchPageLimit = 3; private const int MaxPageLoads = 3;
private static class DivxTotalCategories private static class DivxTotalCategories
{ {
public static string Peliculas => "peliculas"; public static string Peliculas => "peliculas";
@@ -37,6 +39,7 @@ namespace Jackett.Common.Indexers
public static string Programas => "programas"; public static string Programas => "programas";
public static string Otros => "otros"; public static string Otros => "otros";
} }
private static class DivxTotalFizeSizes private static class DivxTotalFizeSizes
{ {
public static long Peliculas => 2147483648; // 2 GB public static long Peliculas => 2147483648; // 2 GB
@@ -104,47 +107,52 @@ namespace Jackett.Common.Indexers
protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query) protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query)
{ {
var newQuery = query.Clone();
var releases = new List<ReleaseInfo>(); var releases = new List<ReleaseInfo>();
var matchWords = ((BoolItem)configData.GetDynamic("MatchWords")).Value; var matchWords = ((BoolItem)configData.GetDynamic("MatchWords")).Value;
matchWords = query.SearchTerm != "" && matchWords; matchWords = newQuery.SearchTerm != "" && matchWords;
// we remove parts from the original query // we remove parts from the original query
query = ParseQuery(query); newQuery = ParseQuery(newQuery);
var qc = new NameValueCollection { { "s", query.SearchTerm } }; var qc = new NameValueCollection { { "s", newQuery.SearchTerm } };
var page = 1; var page = 1;
var isLastPage = false; IHtmlDocument htmlDocument = null;
do do
{ {
var url = SiteLink + "page/" + page + "/?" + qc.GetQueryString(); var url = SiteLink + "page/" + page + "/?" + qc.GetQueryString();
var result = await RequestWithCookiesAsync(url);
if (result.Status != HttpStatusCode.OK) string htmlString;
throw new ExceptionWithConfigData(result.ContentString, configData); try
{
htmlString = await LoadWebPageAsync(url);
}
catch
{
logger.Error($"DivxTotal: Failed to load url {url}");
return releases;
}
try try
{ {
var searchResultParser = new HtmlParser(); htmlDocument = ParseHtmlIntoDocument(htmlString);
var doc = searchResultParser.ParseDocument(result.ContentString);
var table = doc.QuerySelector("table.table"); var table = htmlDocument.QuerySelector("table.table");
if (table == null) if (table == null)
break; break;
var rows = table.QuerySelectorAll("tr");
isLastPage = rows.Length - 1 <= MaxResultsPerPage; // rows includes the header var rows = table.QuerySelectorAll("tbody > tr");
var isHeader = true;
foreach (var row in rows) foreach (var row in rows)
{ {
if (isHeader)
{
isHeader = false;
continue;
}
try try
{ {
await ParseRelease(releases, row, query, matchWords); var rels = await ParseReleasesAsync(row, newQuery, matchWords);
if (rels.Any())
{
releases.AddRange(rels);
}
} }
catch (Exception ex) catch (Exception ex)
{ {
@@ -154,16 +162,39 @@ namespace Jackett.Common.Indexers
} }
catch (Exception ex) catch (Exception ex)
{ {
OnParseError(result.ContentString, ex); OnParseError(htmlString, ex);
} }
page++; // update page number page++;
} while (!isLastPage && page <= MaxSearchPageLimit); } while (page <= MaxPageLoads &&
releases.Count < MaxNrOfResults &&
!IsLastPageOfQueryResult(htmlDocument));
return releases; return releases;
} }
/// <Exception cref="ExceptionWithConfigData" />
private async Task<string> LoadWebPageAsync(string url)
{
var result = await RequestWithCookiesAsync(url);
return result.Status == HttpStatusCode.OK
? result.ContentString
: throw new ExceptionWithConfigData(result.ContentString, configData);
}
private IHtmlDocument ParseHtmlIntoDocument(string htmlContentString)
=> new HtmlParser().ParseDocument(htmlContentString);
private bool IsLastPageOfQueryResult(IHtmlDocument htmlDocument)
{
if (htmlDocument == null)
return true;
var nextPageAnchor = htmlDocument.QuerySelector("ul.pagination > li.active + li > a");
return nextPageAnchor == null;
}
public override async Task<byte[]> Download(Uri link) public override async Task<byte[]> Download(Uri link)
{ {
// for tv series we already have the link // for tv series we already have the link
@@ -171,85 +202,82 @@ namespace Jackett.Common.Indexers
// for other categories we have to do another step // for other categories we have to do another step
if (!downloadUrl.Contains(DownloadLink)) if (!downloadUrl.Contains(DownloadLink))
{ {
var result = await RequestWithCookiesAsync(downloadUrl); var htmlString = await LoadWebPageAsync(downloadUrl);
var htmlDocument = ParseHtmlIntoDocument(htmlString);
if (result.Status != HttpStatusCode.OK) downloadUrl = GetDownloadLink(htmlDocument);
throw new ExceptionWithConfigData(result.ContentString, configData);
var searchResultParser = new HtmlParser();
var doc = searchResultParser.ParseDocument(result.ContentString);
downloadUrl = GetDownloadLink(doc);
} }
var content = await base.Download(new Uri(downloadUrl)); var content = await base.Download(new Uri(downloadUrl));
return content; return content;
} }
private async Task ParseRelease(ICollection<ReleaseInfo> releases, IParentNode row, TorznabQuery query, private async Task<List<ReleaseInfo>> ParseReleasesAsync(IParentNode row, TorznabQuery query, bool matchWords)
bool matchWords)
{ {
var releases = new List<ReleaseInfo>();
var anchor = row.QuerySelector("a"); var anchor = row.QuerySelector("a");
var detailsStr = anchor.GetAttribute("href");
var title = anchor.TextContent.Trim(); var title = anchor.TextContent.Trim();
// match the words in the query with the titles
if (matchWords && !CheckTitleMatchWords(query.SearchTerm, title))
{
return releases;
}
var detailsStr = anchor.GetAttribute("href");
var cat = detailsStr.Split('/')[3]; var cat = detailsStr.Split('/')[3];
var categories = MapTrackerCatToNewznab(cat); var categories = MapTrackerCatToNewznab(cat);
// return results only for requested categories
if (query.Categories.Any() && !query.Categories.Contains(categories.First()))
{
return releases;
}
var publishStr = row.QuerySelectorAll("td")[2].TextContent.Trim(); var publishStr = row.QuerySelectorAll("td")[2].TextContent.Trim();
var publishDate = TryToParseDate(publishStr, DateTime.Now); var publishDate = TryToParseDate(publishStr, DateTime.Now);
var sizeStr = row.QuerySelectorAll("td")[3].TextContent.Trim(); var sizeStr = row.QuerySelectorAll("td")[3].TextContent.Trim();
// return results only for requested categories
if (query.Categories.Any() && !query.Categories.Contains(categories.First()))
return;
// match the words in the query with the titles
if (matchWords && !CheckTitleMatchWords(query.SearchTerm, title))
return;
// parsing is different for each category // parsing is different for each category
if (cat == DivxTotalCategories.Series) if (cat == DivxTotalCategories.Series)
await ParseSeriesRelease(releases, query, detailsStr, cat, publishDate); {
var seriesReleases = await ParseSeriesReleaseAsync(query, detailsStr, cat, publishDate);
releases.AddRange(seriesReleases);
}
else if (query.Episode == null) // if it's scene series, we don't return other categories else if (query.Episode == null) // if it's scene series, we don't return other categories
{ {
if (cat == DivxTotalCategories.Peliculas || cat == DivxTotalCategories.PeliculasHd || if (cat == DivxTotalCategories.Peliculas || cat == DivxTotalCategories.PeliculasHd ||
cat == DivxTotalCategories.Peliculas3D || cat == DivxTotalCategories.PeliculasDvdr) cat == DivxTotalCategories.Peliculas3D || cat == DivxTotalCategories.PeliculasDvdr)
ParseMovieRelease(releases, query, title, detailsStr, cat, publishDate, sizeStr); {
var movieRelease = ParseMovieRelease(query, title, detailsStr, cat, publishDate, sizeStr);
releases.Add(movieRelease);
}
else else
{ {
var size = TryToParseSize(sizeStr, DivxTotalFizeSizes.Otros); var size = TryToParseSize(sizeStr, DivxTotalFizeSizes.Otros);
GenerateRelease(releases, title, detailsStr, detailsStr, cat, publishDate, size); var release = GenerateRelease(title, detailsStr, detailsStr, cat, publishDate, size);
releases.Add(release);
} }
} }
return releases;
} }
private async Task ParseSeriesRelease(ICollection<ReleaseInfo> releases, TorznabQuery query, private async Task<List<ReleaseInfo>> ParseSeriesReleaseAsync(TorznabQuery query, string detailsStr, string cat, DateTime publishDate)
string detailsStr, string cat, DateTime publishDate)
{ {
var result = await RequestWithCookiesAsync(detailsStr); var seriesReleases = new List<ReleaseInfo>();
if (result.Status != HttpStatusCode.OK) var htmlString = await LoadWebPageAsync(detailsStr);
throw new ExceptionWithConfigData(result.ContentString, configData); var htmlDocument = ParseHtmlIntoDocument(htmlString);
var searchResultParser = new HtmlParser(); var tables = htmlDocument.QuerySelectorAll("table.table");
var doc = searchResultParser.ParseDocument(result.ContentString);
var tables = doc.QuerySelectorAll("table.table");
foreach (var table in tables) foreach (var table in tables)
{ {
var rows = table.QuerySelectorAll("tr"); var rows = table.QuerySelectorAll("tbody > tr");
var isHeader = true;
foreach (var row in rows) foreach (var row in rows)
{ {
if (isHeader)
{
isHeader = false;
continue;
}
var anchor = row.QuerySelector("a"); var anchor = row.QuerySelector("a");
var episodeTitle = anchor.TextContent.Trim(); var episodeTitle = anchor.TextContent.Trim();
var downloadLink = GetDownloadLink(row);
var episodePublishStr = row.QuerySelectorAll("td")[3].TextContent.Trim();
var episodePublish = TryToParseDate(episodePublishStr, publishDate);
// Convert the title to Scene format // Convert the title to Scene format
episodeTitle = ParseDivxTotalSeriesTitle(episodeTitle, query); episodeTitle = ParseDivxTotalSeriesTitle(episodeTitle, query);
@@ -258,14 +286,18 @@ namespace Jackett.Common.Indexers
if (query.Episode != null && !episodeTitle.Contains(query.GetEpisodeSearchString())) if (query.Episode != null && !episodeTitle.Contains(query.GetEpisodeSearchString()))
continue; continue;
GenerateRelease(releases, episodeTitle, detailsStr, downloadLink, cat, episodePublish, var downloadLink = GetDownloadLink(row);
DivxTotalFizeSizes.Series); var episodePublishStr = row.QuerySelectorAll("td")[3].TextContent.Trim();
var episodePublish = TryToParseDate(episodePublishStr, publishDate);
seriesReleases.Add(GenerateRelease(episodeTitle, detailsStr, downloadLink, cat, episodePublish, DivxTotalFizeSizes.Series));
} }
} }
return seriesReleases;
} }
private void ParseMovieRelease(ICollection<ReleaseInfo> releases, TorznabQuery query, string title, private ReleaseInfo ParseMovieRelease(TorznabQuery query, string title, string detailsStr, string cat, DateTime publishDate, string sizeStr)
string detailsStr, string cat, DateTime publishDate, string sizeStr)
{ {
// parse tags in title, we need to put the year after the real title (before the tags) // parse tags in title, we need to put the year after the real title (before the tags)
// La Maldicion ( HD-CAM) // La Maldicion ( HD-CAM)
@@ -301,11 +333,11 @@ namespace Jackett.Common.Indexers
else else
throw new Exception("Unknown category " + cat); throw new Exception("Unknown category " + cat);
GenerateRelease(releases, title, detailsStr, detailsStr, cat, publishDate, size); var movieRelease = GenerateRelease(title, detailsStr, detailsStr, cat, publishDate, size);
return movieRelease;
} }
private void GenerateRelease(ICollection<ReleaseInfo> releases, string title, string detailsStr, private ReleaseInfo GenerateRelease(string title, string detailsStr, string downloadLink, string cat, DateTime publishDate, long size)
string downloadLink, string cat, DateTime publishDate, long size)
{ {
var link = new Uri(downloadLink); var link = new Uri(downloadLink);
var details = new Uri(detailsStr); var details = new Uri(detailsStr);
@@ -324,7 +356,7 @@ namespace Jackett.Common.Indexers
DownloadVolumeFactor = 0, DownloadVolumeFactor = 0,
UploadVolumeFactor = 1 UploadVolumeFactor = 1
}; };
releases.Add(release); return release;
} }
private static string GetDownloadLink(IParentNode dom) => private static string GetDownloadLink(IParentNode dom) =>