dontorrent: parse year for movie releases

plus some refactoring to use ParseDocumentAsync
This commit is contained in:
Bogdan
2025-01-24 16:48:25 +02:00
parent 0164a9568d
commit abf1c81863

View File

@@ -8,6 +8,7 @@ using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
using AngleSharp.Html.Parser; using AngleSharp.Html.Parser;
using Jackett.Common.Extensions;
using Jackett.Common.Helpers; using Jackett.Common.Helpers;
using Jackett.Common.Models; using Jackett.Common.Models;
using Jackett.Common.Models.IndexerConfig; using Jackett.Common.Models.IndexerConfig;
@@ -169,7 +170,7 @@ namespace Jackett.Common.Indexers.Definitions
var result = await RequestWithCookiesAsync(downloadUrl); var result = await RequestWithCookiesAsync(downloadUrl);
if (result.Status != HttpStatusCode.OK) if (result.Status != HttpStatusCode.OK)
throw new ExceptionWithConfigData(result.ContentString, configData); throw new ExceptionWithConfigData(result.ContentString, configData);
using var dom = parser.ParseDocument(result.ContentString); using var dom = await parser.ParseDocumentAsync(result.ContentString);
//var info = dom.QuerySelectorAll("div.descargar > div.card > div.card-body").First(); //var info = dom.QuerySelectorAll("div.descargar > div.card > div.card-body").First();
//var title = info.QuerySelector("h2.descargarTitulo").TextContent; //var title = info.QuerySelector("h2.descargarTitulo").TextContent;
@@ -186,15 +187,20 @@ namespace Jackett.Common.Indexers.Definitions
private async Task<List<ReleaseInfo>> PerformQueryNewest(TorznabQuery query) private async Task<List<ReleaseInfo>> PerformQueryNewest(TorznabQuery query)
{ {
var releases = new List<ReleaseInfo>(); var releases = new List<ReleaseInfo>();
var url = SiteLink + NewTorrentsUrl; var url = SiteLink + NewTorrentsUrl;
var result = await RequestWithCookiesAsync(url); var result = await RequestWithCookiesAsync(url);
if (result.Status != HttpStatusCode.OK) if (result.Status != HttpStatusCode.OK)
{
throw new ExceptionWithConfigData(result.ContentString, configData); throw new ExceptionWithConfigData(result.ContentString, configData);
logger.Debug("\naaa"); }
try try
{ {
var searchResultParser = new HtmlParser(); var searchResultParser = new HtmlParser();
using var doc = searchResultParser.ParseDocument(result.ContentString); using var doc = await searchResultParser.ParseDocumentAsync(result.ContentString);
var rows = doc.QuerySelector("div.seccion#ultimos_torrents > div.card > div.card-body > div"); var rows = doc.QuerySelector("div.seccion#ultimos_torrents > div.card > div.card-body > div");
@@ -238,11 +244,11 @@ namespace Jackett.Common.Indexers.Definitions
if (row.TagName.Equals("BR")) if (row.TagName.Equals("BR"))
{ {
// we add parsed items to rowDetailsLink to avoid duplicates in newest torrents // we add parsed items to rowDetailsLink to avoid duplicates in the newest torrents list results
// list results
if (!parsedDetailsLink.Contains(rowDetailsLink) && rowTitle != null) if (!parsedDetailsLink.Contains(rowDetailsLink) && rowTitle != null)
{ {
var cat = GetCategory(rowTitle, rowDetailsLink); var cat = GetCategory(rowTitle, rowDetailsLink);
switch (cat) switch (cat)
{ {
case "pelicula": case "pelicula":
@@ -253,9 +259,8 @@ namespace Jackett.Common.Indexers.Definitions
await ParseRelease(releases, rowDetailsLink, rowTitle, cat, rowQuality, query, false); await ParseRelease(releases, rowDetailsLink, rowTitle, cat, rowQuality, query, false);
parsedDetailsLink.Add(rowDetailsLink); parsedDetailsLink.Add(rowDetailsLink);
break; break;
default:
break;
} }
// clean the current row // clean the current row
rowTitle = null; rowTitle = null;
rowDetailsLink = null; rowDetailsLink = null;
@@ -285,7 +290,7 @@ namespace Jackett.Common.Indexers.Definitions
try try
{ {
var searchResultParser = new HtmlParser(); var searchResultParser = new HtmlParser();
using var doc = searchResultParser.ParseDocument(result.ContentString); using var doc = await searchResultParser.ParseDocumentAsync(result.ContentString);
var rows = doc.QuerySelectorAll("div.seccion#buscador > div.card > div.card-body > p"); var rows = doc.QuerySelectorAll("div.seccion#buscador > div.card > div.card-body > p");
@@ -307,7 +312,6 @@ namespace Jackett.Common.Indexers.Definitions
case "pelicula": case "pelicula":
case "serie": case "serie":
quality = Regex.Replace(row.QuerySelector("p > span > span").TextContent, "([()])", ""); quality = Regex.Replace(row.QuerySelector("p > span > span").TextContent, "([()])", "");
break; break;
} }
@@ -320,8 +324,6 @@ namespace Jackett.Common.Indexers.Definitions
case "musica": case "musica":
await ParseRelease(releases, link, title, cat, quality, query, matchWords); await ParseRelease(releases, link, title, cat, quality, query, matchWords);
break; break;
default: //ignore different categories
break;
} }
} }
} }
@@ -364,8 +366,6 @@ namespace Jackett.Common.Indexers.Definitions
case "musica": case "musica":
await ParseMusicRelease(releases, link, query, title); await ParseMusicRelease(releases, link, query, title);
break; break;
default:
break;
} }
} }
@@ -376,7 +376,7 @@ namespace Jackett.Common.Indexers.Definitions
throw new ExceptionWithConfigData(result.ContentString, configData); throw new ExceptionWithConfigData(result.ContentString, configData);
var searchResultParser = new HtmlParser(); var searchResultParser = new HtmlParser();
using var doc = searchResultParser.ParseDocument(result.ContentString); using var doc = await searchResultParser.ParseDocumentAsync(result.ContentString);
var data = doc.QuerySelector("div.descargar > div.card > div.card-body"); var data = doc.QuerySelector("div.descargar > div.card > div.card-body");
@@ -405,7 +405,7 @@ namespace Jackett.Common.Indexers.Definitions
throw new ExceptionWithConfigData(result.ContentString, configData); throw new ExceptionWithConfigData(result.ContentString, configData);
var searchResultParser = new HtmlParser(); var searchResultParser = new HtmlParser();
using var doc = searchResultParser.ParseDocument(result.ContentString); using var doc = await searchResultParser.ParseDocumentAsync(result.ContentString);
var data = doc.QuerySelector("div.descargar > div.card > div.card-body"); var data = doc.QuerySelector("div.descargar > div.card > div.card-body");
@@ -455,10 +455,12 @@ namespace Jackett.Common.Indexers.Definitions
var result = await RequestWithCookiesAsync(link); var result = await RequestWithCookiesAsync(link);
if (result.Status != HttpStatusCode.OK) if (result.Status != HttpStatusCode.OK)
{
throw new ExceptionWithConfigData(result.ContentString, configData); throw new ExceptionWithConfigData(result.ContentString, configData);
}
var searchResultParser = new HtmlParser(); var searchResultParser = new HtmlParser();
using var doc = searchResultParser.ParseDocument(result.ContentString); using var doc = await searchResultParser.ParseDocumentAsync(result.ContentString);
// parse tags in title, we need to put the year after the real title (before the tags) // parse tags in title, we need to put the year after the real title (before the tags)
// Harry Potter And The Deathly Hallows: Part 1 [subs. Integrados] // Harry Potter And The Deathly Hallows: Part 1 [subs. Integrados]
@@ -467,12 +469,20 @@ namespace Jackett.Common.Indexers.Definitions
foreach (Match m in queryMatches) foreach (Match m in queryMatches)
{ {
var tag = m.Groups[1].Value.Trim().ToUpper(); var tag = m.Groups[1].Value.Trim().ToUpper();
if (tag.Equals("4K")) // Fix 4K quality. Eg Harry Potter Y La Orden Del Fénix [4k] if (tag.Equals("4K")) // Fix 4K quality. Eg Harry Potter Y La Orden Del Fénix [4k]
{
quality = "(UHD 4K 2160p)"; quality = "(UHD 4K 2160p)";
}
else if (tag.Equals("FULLBLURAY")) // Fix 4K quality. Eg Harry Potter Y El Cáliz De Fuego (fullbluray) else if (tag.Equals("FULLBLURAY")) // Fix 4K quality. Eg Harry Potter Y El Cáliz De Fuego (fullbluray)
{
quality = "(COMPLETE BLURAY)"; quality = "(COMPLETE BLURAY)";
}
else // Add the tag to the title else // Add the tag to the title
{
tags += " " + tag; tags += " " + tag;
}
title = title.Replace(m.Groups[0].Value, ""); title = title.Replace(m.Groups[0].Value, "");
} }
title = title.Trim(); title = title.Trim();
@@ -487,8 +497,17 @@ namespace Jackett.Common.Indexers.Definitions
quality = Regex.Replace(quality, "HDRip", "BDRip", RegexOptions.IgnoreCase); // fix for Radarr quality = Regex.Replace(quality, "HDRip", "BDRip", RegexOptions.IgnoreCase); // fix for Radarr
} }
var releaseYear = doc.QuerySelector("div.d-inline-block.ml-2 > p:contains('Año') > a")?.TextContent.Trim();
// add the year // add the year
title = query.Year != null ? title + " " + query.Year : title; if (releaseYear.IsNotNullOrWhiteSpace() && Regex.IsMatch(releaseYear!, @"^((?:19|20)\d{2})$"))
{
title += $" {releaseYear}";
}
else if (query.Year is > 0)
{
title += $" {query.Year}";
}
// add the tags // add the tags
title += tags; title += tags;
@@ -498,7 +517,9 @@ namespace Jackett.Common.Indexers.Definitions
// add quality // add quality
if (quality != null) if (quality != null)
{
title += " " + quality; title += " " + quality;
}
var info = doc.QuerySelectorAll("div.descargar > div.card > div.card-body").First(); var info = doc.QuerySelectorAll("div.descargar > div.card > div.card-body").First();
var moreinfo = info.QuerySelectorAll("div.text-center > div.d-inline-block"); var moreinfo = info.QuerySelectorAll("div.text-center > div.d-inline-block");
@@ -506,17 +527,28 @@ namespace Jackett.Common.Indexers.Definitions
// guess size // guess size
long size; long size;
if (moreinfo.Length == 2) if (moreinfo.Length == 2)
{
size = ParseUtil.GetBytes(moreinfo[1].QuerySelector("p").TextContent); size = ParseUtil.GetBytes(moreinfo[1].QuerySelector("p").TextContent);
}
else if (title.ToLower().Contains("4k")) else if (title.ToLower().Contains("4k"))
{
size = 50.Gigabytes(); size = 50.Gigabytes();
}
else if (title.ToLower().Contains("1080p")) else if (title.ToLower().Contains("1080p"))
{
size = 4.Gigabytes(); size = 4.Gigabytes();
}
else if (title.ToLower().Contains("720p")) else if (title.ToLower().Contains("720p"))
{
size = 1.Gigabytes(); size = 1.Gigabytes();
}
else else
{
size = 512.Megabytes(); size = 512.Megabytes();
}
var release = GenerateRelease(title, link, link, GetCategory(title, link), DateTime.Now, size); var release = GenerateRelease(title, link, link, GetCategory(title, link), DateTime.Now, size);
releases.Add(release); releases.Add(release);
} }