dontorrent: parse year for movie releases

plus some refactoring to use ParseDocumentAsync
This commit is contained in:
Bogdan
2025-01-24 16:48:25 +02:00
parent 0164a9568d
commit abf1c81863

View File

@@ -8,6 +8,7 @@ using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using AngleSharp.Html.Parser;
using Jackett.Common.Extensions;
using Jackett.Common.Helpers;
using Jackett.Common.Models;
using Jackett.Common.Models.IndexerConfig;
@@ -169,7 +170,7 @@ namespace Jackett.Common.Indexers.Definitions
var result = await RequestWithCookiesAsync(downloadUrl);
if (result.Status != HttpStatusCode.OK)
throw new ExceptionWithConfigData(result.ContentString, configData);
using var dom = parser.ParseDocument(result.ContentString);
using var dom = await parser.ParseDocumentAsync(result.ContentString);
//var info = dom.QuerySelectorAll("div.descargar > div.card > div.card-body").First();
//var title = info.QuerySelector("h2.descargarTitulo").TextContent;
@@ -186,15 +187,20 @@ namespace Jackett.Common.Indexers.Definitions
private async Task<List<ReleaseInfo>> PerformQueryNewest(TorznabQuery query)
{
var releases = new List<ReleaseInfo>();
var url = SiteLink + NewTorrentsUrl;
var result = await RequestWithCookiesAsync(url);
if (result.Status != HttpStatusCode.OK)
{
throw new ExceptionWithConfigData(result.ContentString, configData);
logger.Debug("\naaa");
}
try
{
var searchResultParser = new HtmlParser();
using var doc = searchResultParser.ParseDocument(result.ContentString);
using var doc = await searchResultParser.ParseDocumentAsync(result.ContentString);
var rows = doc.QuerySelector("div.seccion#ultimos_torrents > div.card > div.card-body > div");
@@ -238,11 +244,11 @@ namespace Jackett.Common.Indexers.Definitions
if (row.TagName.Equals("BR"))
{
// we add parsed items to rowDetailsLink to avoid duplicates in newest torrents
// list results
// we add parsed items to rowDetailsLink to avoid duplicates in the newest torrents list results
if (!parsedDetailsLink.Contains(rowDetailsLink) && rowTitle != null)
{
var cat = GetCategory(rowTitle, rowDetailsLink);
switch (cat)
{
case "pelicula":
@@ -253,9 +259,8 @@ namespace Jackett.Common.Indexers.Definitions
await ParseRelease(releases, rowDetailsLink, rowTitle, cat, rowQuality, query, false);
parsedDetailsLink.Add(rowDetailsLink);
break;
default:
break;
}
// clean the current row
rowTitle = null;
rowDetailsLink = null;
@@ -285,7 +290,7 @@ namespace Jackett.Common.Indexers.Definitions
try
{
var searchResultParser = new HtmlParser();
using var doc = searchResultParser.ParseDocument(result.ContentString);
using var doc = await searchResultParser.ParseDocumentAsync(result.ContentString);
var rows = doc.QuerySelectorAll("div.seccion#buscador > div.card > div.card-body > p");
@@ -307,7 +312,6 @@ namespace Jackett.Common.Indexers.Definitions
case "pelicula":
case "serie":
quality = Regex.Replace(row.QuerySelector("p > span > span").TextContent, "([()])", "");
break;
}
@@ -320,8 +324,6 @@ namespace Jackett.Common.Indexers.Definitions
case "musica":
await ParseRelease(releases, link, title, cat, quality, query, matchWords);
break;
default: //ignore different categories
break;
}
}
}
@@ -364,8 +366,6 @@ namespace Jackett.Common.Indexers.Definitions
case "musica":
await ParseMusicRelease(releases, link, query, title);
break;
default:
break;
}
}
@@ -376,7 +376,7 @@ namespace Jackett.Common.Indexers.Definitions
throw new ExceptionWithConfigData(result.ContentString, configData);
var searchResultParser = new HtmlParser();
using var doc = searchResultParser.ParseDocument(result.ContentString);
using var doc = await searchResultParser.ParseDocumentAsync(result.ContentString);
var data = doc.QuerySelector("div.descargar > div.card > div.card-body");
@@ -405,7 +405,7 @@ namespace Jackett.Common.Indexers.Definitions
throw new ExceptionWithConfigData(result.ContentString, configData);
var searchResultParser = new HtmlParser();
using var doc = searchResultParser.ParseDocument(result.ContentString);
using var doc = await searchResultParser.ParseDocumentAsync(result.ContentString);
var data = doc.QuerySelector("div.descargar > div.card > div.card-body");
@@ -455,10 +455,12 @@ namespace Jackett.Common.Indexers.Definitions
var result = await RequestWithCookiesAsync(link);
if (result.Status != HttpStatusCode.OK)
{
throw new ExceptionWithConfigData(result.ContentString, configData);
}
var searchResultParser = new HtmlParser();
using var doc = searchResultParser.ParseDocument(result.ContentString);
using var doc = await searchResultParser.ParseDocumentAsync(result.ContentString);
// parse tags in title, we need to put the year after the real title (before the tags)
// Harry Potter And The Deathly Hallows: Part 1 [subs. Integrados]
@@ -467,12 +469,20 @@ namespace Jackett.Common.Indexers.Definitions
foreach (Match m in queryMatches)
{
var tag = m.Groups[1].Value.Trim().ToUpper();
if (tag.Equals("4K")) // Fix 4K quality. Eg Harry Potter Y La Orden Del Fénix [4k]
{
quality = "(UHD 4K 2160p)";
}
else if (tag.Equals("FULLBLURAY")) // Fix 4K quality. Eg Harry Potter Y El Cáliz De Fuego (fullbluray)
{
quality = "(COMPLETE BLURAY)";
}
else // Add the tag to the title
{
tags += " " + tag;
}
title = title.Replace(m.Groups[0].Value, "");
}
title = title.Trim();
@@ -487,8 +497,17 @@ namespace Jackett.Common.Indexers.Definitions
quality = Regex.Replace(quality, "HDRip", "BDRip", RegexOptions.IgnoreCase); // fix for Radarr
}
var releaseYear = doc.QuerySelector("div.d-inline-block.ml-2 > p:contains('Año') > a")?.TextContent.Trim();
// add the year
title = query.Year != null ? title + " " + query.Year : title;
if (releaseYear.IsNotNullOrWhiteSpace() && Regex.IsMatch(releaseYear!, @"^((?:19|20)\d{2})$"))
{
title += $" {releaseYear}";
}
else if (query.Year is > 0)
{
title += $" {query.Year}";
}
// add the tags
title += tags;
@@ -498,7 +517,9 @@ namespace Jackett.Common.Indexers.Definitions
// add quality
if (quality != null)
{
title += " " + quality;
}
var info = doc.QuerySelectorAll("div.descargar > div.card > div.card-body").First();
var moreinfo = info.QuerySelectorAll("div.text-center > div.d-inline-block");
@@ -506,17 +527,28 @@ namespace Jackett.Common.Indexers.Definitions
// guess size
long size;
if (moreinfo.Length == 2)
{
size = ParseUtil.GetBytes(moreinfo[1].QuerySelector("p").TextContent);
}
else if (title.ToLower().Contains("4k"))
{
size = 50.Gigabytes();
}
else if (title.ToLower().Contains("1080p"))
{
size = 4.Gigabytes();
}
else if (title.ToLower().Contains("720p"))
{
size = 1.Gigabytes();
}
else
{
size = 512.Megabytes();
}
var release = GenerateRelease(title, link, link, GetCategory(title, link), DateTime.Now, size);
releases.Add(release);
}