From fc79f23d323f4fb2d9dcb15393494eccd0fe47fd Mon Sep 17 00:00:00 2001 From: Bogdan Date: Wed, 15 Feb 2023 07:29:13 +0200 Subject: [PATCH] audiobookbay: migrate to C#. resolves #8687 (#14015) --- .../Definitions/audiobookbay.yml | 198 ------------ src/Jackett.Common/Indexers/AudioBookBay.cs | 296 ++++++++++++++++++ src/Jackett.Updater/Program.cs | 1 + 3 files changed, 297 insertions(+), 198 deletions(-) delete mode 100644 src/Jackett.Common/Definitions/audiobookbay.yml create mode 100644 src/Jackett.Common/Indexers/AudioBookBay.cs diff --git a/src/Jackett.Common/Definitions/audiobookbay.yml b/src/Jackett.Common/Definitions/audiobookbay.yml deleted file mode 100644 index 980f27b4e..000000000 --- a/src/Jackett.Common/Definitions/audiobookbay.yml +++ /dev/null @@ -1,198 +0,0 @@ ---- -id: audiobookbay -name: AudioBookBay -description: "AudioBook Bay (ABB) is a public Torrent Tracker for AUDIOBOOKS" -language: en-US -type: public -encoding: UTF-8 -requestDelay: 2 -links: - - https://audiobookbay.li/ - - https://audiobookbay.se/ -legacylinks: - - https://audiobookbay.la/ - - http://audiobookbay.net/ - - https://audiobookbay.unblockit.tv/ - - http://audiobookbay.nl/ - - http://audiobookbay.ws/ - - https://audiobookbay.unblockit.how/ - - https://audiobookbay.unblockit.cam/ - - https://audiobookbay.unblockit.biz/ - - https://audiobookbay.unblockit.day/ - - https://audiobookbay.unblockit.llc/ - - https://audiobookbay.unblockit.blue/ - - https://audiobookbay.unblockit.name/ - - http://audiobookbay.fi/ - - http://audiobookbay.se/ - - http://audiobookbayabb.com/ - - https://audiobookbay.unblockit.ist/ - - https://audiobookbay.unblockit.bet/ - - https://audiobookbay.unblockit.cat/ - - https://audiobookbay.unblockit.nz/ - - https://audiobookbay.fi/ - - https://audiobookbay.unblockit.page/ - - https://audiobookbay.unblockit.pet/ - - https://audiobookbay.unblockit.ink/ - - https://audiobookbay.unblockit.bio/ # error 502 - -caps: - categorymappings: - - {id: "(Post)apocalyptic", cat: Audio/Audiobook, desc: "(Post)apocalyptic"} - - {id: Action, cat: Audio/Audiobook, desc: "Action"} - - {id: Adults, cat: Audio/Audiobook, desc: "Adults"} - - {id: Adventure, cat: Audio/Audiobook, desc: "Adventure"} - - {id: Anthology, cat: Audio/Audiobook, desc: "Anthology"} - - {id: Art, cat: Audio/Audiobook, desc: "Art"} - - {id: Autobiography, cat: Audio/Audiobook, desc: "Autobiography & Biographies"} - - {id: Bestsellers, cat: Audio/Audiobook, desc: "Bestsellers"} - - {id: Business, cat: Audio/Audiobook, desc: "Business"} - - {id: Children, cat: Audio/Audiobook, desc: "Children"} - - {id: Classic, cat: Audio/Audiobook, desc: "Classic"} - - {id: Computer, cat: Audio/Audiobook, desc: "Computer"} - - {id: Contemporary, cat: Audio/Audiobook, desc: "Contemporary"} - - {id: Crime, cat: Audio/Audiobook, desc: "Crime"} - - {id: Detective, cat: Audio/Audiobook, desc: "Detective"} - - {id: Doctor, cat: Audio/Audiobook, desc: "Doctor Who"} - - {id: Documentary, cat: Audio/Audiobook, desc: "Documentary"} - - {id: Education, cat: Audio/Audiobook, desc: "Education"} - - {id: Fantasy, cat: Audio/Audiobook, desc: "Fantasy"} - - {id: Full, cat: Audio/Audiobook, desc: "Full Cast"} - - {id: Gay, cat: Audio/Audiobook, desc: "Gay"} - - {id: General, cat: Audio/Audiobook, desc: "General Fiction"} - - {id: Historical, cat: Audio/Audiobook, desc: "Historical Fiction"} - - {id: History, cat: Audio/Audiobook, desc: "History"} - - {id: Horror, cat: Audio/Audiobook, desc: "Horror"} - - {id: Humor, cat: Audio/Audiobook, desc: "Humor"} - - {id: Lecture, cat: Audio/Audiobook, desc: "Lecture"} - - {id: Lesbian, cat: Audio/Audiobook, desc: "Lesbian"} - - {id: LGBT, cat: Audio/Audiobook, desc: "LGBT"} - - {id: Libertarian, cat: Audio/Audiobook, desc: "Libertarian"} - - {id: Literature, cat: Audio/Audiobook, desc: "Literature"} - - {id: LitRPG, cat: Audio/Audiobook, desc: "LitRPG"} - - {id: Military, cat: Audio/Audiobook, desc: "Military"} - - {id: Misc., cat: Audio/Audiobook, desc: "Misc. Non-fiction"} - - {id: Mystery, cat: Audio/Audiobook, desc: "Mystery"} - - {id: Novel, cat: Audio/Audiobook, desc: "Novel"} - - {id: Other, cat: Audio/Audiobook, desc: "Other"} - - {id: Paranormal, cat: Audio/Audiobook, desc: "Paranormal"} - - {id: Plays, cat: Audio/Audiobook, desc: "Plays & Theater"} - - {id: Poetry, cat: Audio/Audiobook, desc: "Poetry"} - - {id: Political, cat: Audio/Audiobook, desc: "Political"} - - {id: Radio, cat: Audio/Audiobook, desc: "Radio Productions"} - - {id: Romance, cat: Audio/Audiobook, desc: "Romance"} - - {id: Sci-Fi, cat: Audio/Audiobook, desc: "Sci-Fi"} - - {id: Science, cat: Audio/Audiobook, desc: "Science"} - - {id: Self-help, cat: Audio/Audiobook, desc: "Self-help"} - - {id: Sex, cat: Audio/Audiobook, desc: "Sex Scenes"} - - {id: Short, cat: Audio/Audiobook, desc: "Short Story"} - - {id: Spiritual, cat: Audio/Audiobook, desc: "Spiritual & Religious"} - - {id: Sport, cat: Audio/Audiobook, desc: "Sport & Recreation"} - - {id: Suspense, cat: Audio/Audiobook, desc: "Suspense"} - - {id: Teen, cat: Audio/Audiobook, desc: "Teen & Young Adult"} - - {id: Thriller, cat: Audio/Audiobook, desc: "Thriller"} - - {id: "True", cat: Audio/Audiobook, desc: "True Crime"} - - {id: Tutorial, cat: Audio/Audiobook, desc: "Tutorial"} - - {id: Violence, cat: Audio/Audiobook, desc: "Violence"} - - {id: Westerns, cat: Audio/Audiobook, desc: "Westerns"} - - {id: Zombies, cat: Audio/Audiobook, desc: "Zombies"} - - modes: - search: [q] - book-search: [q] - -settings: [] - -download: - infohash: - hash: - selector: td:contains("Info Hash:") ~ td - filters: - - name: regexp - args: ([A-F|a-f|0-9]{40}) - title: - selector: h1 - filters: - - name: trim - - name: validfilename - -search: - paths: - # with just 7 results per page, try to grab up to 35 results - # http://audiobookbay.nl/?s=teeth - # http://audiobookbay.nl/page/2/?s=teeth - - path: "{{ if .Keywords }}?s={{ .Keywords }}&tt=1{{ else }}{{ end }}" - - path: "page/2/{{ if .Keywords }}?s={{ .Keywords }}&tt=1{{ else }}{{ end }}" - - path: "page/3/{{ if .Keywords }}?s={{ .Keywords }}&tt=1{{ else }}{{ end }}" - - path: "page/4/{{ if .Keywords }}?s={{ .Keywords }}&tt=1{{ else }}{{ end }}" - - path: "page/5/{{ if .Keywords }}?s={{ .Keywords }}&tt=1{{ else }}{{ end }}" - - rows: - selector: div.post:has(div[class="postTitle"]) - filters: - - name: andmatch - - fields: - category: - selector: div.postInfo - filters: - - name: regexp - args: "Category: (.+?)\\s" - _format: - optional: true - selector: div.postContent - filters: - - name: regexp - args: "Format: (.+?) /" - - name: replace - args: ["?", ""] - _bitrate: - optional: true - selector: div.postContent - filters: - - name: regexp - args: "Bitrate: (.+?)File" - - name: replace - args: ["?", ""] - title: - selector: div.postTitle - filters: - - name: append - args: "{{ if .Result._format }} [{{ .Result._format }}]{{ else }}{{ end }}{{ if .Result._bitrate }} [{{ .Result._bitrate }}]{{ else }}{{ end }}" - details: - selector: div.postTitle h2 a - attribute: href - download: - selector: div.postTitle h2 a - attribute: href - poster: - selector: img - attribute: src - date_optional: - optional: true - selector: div.postContent - filters: - - name: regexp - args: "(\\d{1,2} \\D{3} \\d{4})" - - name: dateparse - args: "2 Jan 2006" - date: - text: "{{ if .Result.date_optional }}{{ .Result.date_optional }}{{ else }}now{{ end }}" - size_optional: - optional: true - selector: div.postContent - filters: - - name: regexp - args: "File Size: (.+?)$" - - name: replace - args: ["s", ""] - size: - text: "{{ if .Result.size_optional }}{{ .Result.size_optional }}{{ else }}0 B{{ end }}" - seeders: - text: 1 - leechers: - text: 1 - downloadvolumefactor: - text: 0 - uploadvolumefactor: - text: 1 -# engine n/a diff --git a/src/Jackett.Common/Indexers/AudioBookBay.cs b/src/Jackett.Common/Indexers/AudioBookBay.cs new file mode 100644 index 000000000..0749f61e6 --- /dev/null +++ b/src/Jackett.Common/Indexers/AudioBookBay.cs @@ -0,0 +1,296 @@ +using System; +using System.Collections.Generic; +using System.Collections.Specialized; +using System.Diagnostics.CodeAnalysis; +using System.Globalization; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using AngleSharp.Dom; +using AngleSharp.Html.Dom; +using AngleSharp.Html.Parser; +using Jackett.Common.Models; +using Jackett.Common.Models.IndexerConfig; +using Jackett.Common.Services.Interfaces; +using Jackett.Common.Utils; +using Jackett.Common.Utils.Clients; +using Newtonsoft.Json.Linq; +using NLog; + +namespace Jackett.Common.Indexers +{ + [ExcludeFromCodeCoverage] + public class AudioBookBay : BaseWebIndexer + { + public override string[] AlternativeSiteLinks { get; protected set; } = { + "https://audiobookbay.li/", + "https://audiobookbay.se/" + }; + + public override string[] LegacySiteLinks { get; protected set; } = + { + "https://audiobookbay.la/", + "http://audiobookbay.net/", + "https://audiobookbay.unblockit.tv/", + "http://audiobookbay.nl/", + "http://audiobookbay.ws/", + "https://audiobookbay.unblockit.how/", + "https://audiobookbay.unblockit.cam/", + "https://audiobookbay.unblockit.biz/", + "https://audiobookbay.unblockit.day/", + "https://audiobookbay.unblockit.llc/", + "https://audiobookbay.unblockit.blue/", + "https://audiobookbay.unblockit.name/", + "http://audiobookbay.fi/", + "http://audiobookbay.se/", + "http://audiobookbayabb.com/", + "https://audiobookbay.unblockit.ist/", + "https://audiobookbay.unblockit.bet/", + "https://audiobookbay.unblockit.cat/", + "https://audiobookbay.unblockit.nz/", + "https://audiobookbay.fi/", + "https://audiobookbay.unblockit.page/", + "https://audiobookbay.unblockit.pet/", + "https://audiobookbay.unblockit.ink/", + "https://audiobookbay.unblockit.bio/" // error 502 + }; + + public AudioBookBay(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps, ICacheService cs) + : base(id: "audiobookbay", + name: "AudioBook Bay", + description: "AudioBook Bay (ABB) is a public Torrent Tracker for AUDIOBOOKS", + link: "https://audiobookbay.li/", + caps: new TorznabCapabilities + { + BookSearchParams = new List + { + BookSearchParam.Q + } + }, + configService: configService, + client: wc, + logger: l, + p: ps, + cacheService: cs, + configData: new ConfigurationData()) + { + Encoding = Encoding.UTF8; + Language = "en-US"; + Type = "public"; + + // requestDelay for API Limit (1 request per 2 seconds) + webclient.requestDelay = 2.1; + + // Age + AddCategoryMapping("children", TorznabCatType.AudioAudiobook, "Children"); + AddCategoryMapping("teen-young-adult", TorznabCatType.AudioAudiobook, "Teen & Young Adult"); + AddCategoryMapping("adults", TorznabCatType.AudioAudiobook, "Adults"); + + // Category + AddCategoryMapping("postapocalyptic", TorznabCatType.AudioAudiobook, "(Post)apocalyptic"); + AddCategoryMapping("action", TorznabCatType.AudioAudiobook, "Action"); + AddCategoryMapping("adventure", TorznabCatType.AudioAudiobook, "Adventure"); + AddCategoryMapping("art", TorznabCatType.AudioAudiobook, "Art"); + AddCategoryMapping("autobiography-biographies", TorznabCatType.AudioAudiobook, "Autobiography & Biographies"); + AddCategoryMapping("business", TorznabCatType.AudioAudiobook, "Business"); + AddCategoryMapping("computer", TorznabCatType.AudioAudiobook, "Computer"); + AddCategoryMapping("contemporary", TorznabCatType.AudioAudiobook, "Contemporary"); + AddCategoryMapping("crime", TorznabCatType.AudioAudiobook, "Crime"); + AddCategoryMapping("detective", TorznabCatType.AudioAudiobook, "Detective"); + AddCategoryMapping("doctor-who-sci-fi", TorznabCatType.AudioAudiobook, "Doctor Who"); + AddCategoryMapping("education", TorznabCatType.AudioAudiobook, "Education"); + AddCategoryMapping("fantasy", TorznabCatType.AudioAudiobook, "Fantasy"); + AddCategoryMapping("general-fiction", TorznabCatType.AudioAudiobook, "General Fiction"); + AddCategoryMapping("historical-fiction", TorznabCatType.AudioAudiobook, "Historical Fiction"); + AddCategoryMapping("history", TorznabCatType.AudioAudiobook, "History"); + AddCategoryMapping("horror", TorznabCatType.AudioAudiobook, "Horror"); + AddCategoryMapping("humor", TorznabCatType.AudioAudiobook, "Humor"); + AddCategoryMapping("lecture", TorznabCatType.AudioAudiobook, "Lecture"); + AddCategoryMapping("lgbt", TorznabCatType.AudioAudiobook, "LGBT"); + AddCategoryMapping("literature", TorznabCatType.AudioAudiobook, "Literature"); + AddCategoryMapping("litrpg", TorznabCatType.AudioAudiobook, "LitRPG"); + AddCategoryMapping("general-non-fiction", TorznabCatType.AudioAudiobook, "Misc. Non-fiction"); + AddCategoryMapping("mystery", TorznabCatType.AudioAudiobook, "Mystery"); + AddCategoryMapping("paranormal", TorznabCatType.AudioAudiobook, "Paranormal"); + AddCategoryMapping("plays-theater", TorznabCatType.AudioAudiobook, "Plays & Theater"); + AddCategoryMapping("poetry", TorznabCatType.AudioAudiobook, "Poetry"); + AddCategoryMapping("political", TorznabCatType.AudioAudiobook, "Political"); + AddCategoryMapping("radio-productions", TorznabCatType.AudioAudiobook, "Radio Productions"); + AddCategoryMapping("romance", TorznabCatType.AudioAudiobook, "Romance"); + AddCategoryMapping("sci-fi", TorznabCatType.AudioAudiobook, "Sci-Fi"); + AddCategoryMapping("science", TorznabCatType.AudioAudiobook, "Science"); + AddCategoryMapping("self-help", TorznabCatType.AudioAudiobook, "Self-help"); + AddCategoryMapping("spiritual", TorznabCatType.AudioAudiobook, "Spiritual & Religious"); + AddCategoryMapping("sports", TorznabCatType.AudioAudiobook, "Sport & Recreation"); + AddCategoryMapping("suspense", TorznabCatType.AudioAudiobook, "Suspense"); + AddCategoryMapping("thriller", TorznabCatType.AudioAudiobook, "Thriller"); + AddCategoryMapping("true-crime", TorznabCatType.AudioAudiobook, "True Crime"); + AddCategoryMapping("tutorial", TorznabCatType.AudioAudiobook, "Tutorial"); + AddCategoryMapping("westerns", TorznabCatType.AudioAudiobook, "Westerns"); + AddCategoryMapping("zombies", TorznabCatType.AudioAudiobook, "Zombies"); + + // Category Modifiers + AddCategoryMapping("anthology", TorznabCatType.AudioAudiobook, "Anthology"); + AddCategoryMapping("bestsellers", TorznabCatType.AudioAudiobook, "Bestsellers"); + AddCategoryMapping("classic", TorznabCatType.AudioAudiobook, "Classic"); + AddCategoryMapping("documentary", TorznabCatType.AudioAudiobook, "Documentary"); + AddCategoryMapping("full-cast", TorznabCatType.AudioAudiobook, "Full Cast"); + AddCategoryMapping("libertarian", TorznabCatType.AudioAudiobook, "Libertarian"); + AddCategoryMapping("military", TorznabCatType.AudioAudiobook, "Military"); + AddCategoryMapping("novel", TorznabCatType.AudioAudiobook, "Novel"); + AddCategoryMapping("short-story", TorznabCatType.AudioAudiobook, "Short Story"); + } + + public override async Task ApplyConfiguration(JToken configJson) + { + LoadValuesFromJson(configJson); + + var releases = await PerformQuery(new TorznabQuery()); + + await ConfigureIfOK(string.Empty, releases.Any(), () => throw new Exception("Could not find releases from this URL")); + + return IndexerConfigurationStatus.Completed; + } + + protected override async Task> PerformQuery(TorznabQuery query) + { + var releases = new List(); + + var urls = new HashSet + { + SiteLink, + SiteLink + "page/2/", + SiteLink + "page/3/" + }; + + foreach (var url in urls) + { + var searchUrl = url; + + var parameters = new NameValueCollection(); + + var searchString = query.GetQueryString().Trim(); + if (!string.IsNullOrWhiteSpace(searchString)) + { + searchString = Regex.Replace(searchString, @"[\W]+", " ").Trim(); + parameters.Set("s", searchString); + parameters.Set("tt", "1"); + } + + if (parameters.Count > 0) + searchUrl += $"?{parameters.GetQueryString()}"; + + var response = await RequestWithCookiesAsync(searchUrl); + + var pageReleases = ParseReleases(response); + releases.AddRange(pageReleases); + + // Stop fetching the next page when less than 15 results are found. + if (pageReleases.Count < 15) + break; + } + + return releases; + } + + public override async Task Download(Uri link) + { + var response = await RequestWithCookiesAsync(link.ToString()); + + var parser = new HtmlParser(); + var dom = parser.ParseDocument(response.ContentString); + + var hash = dom.QuerySelector("td:contains(\"Info Hash:\") ~ td")?.TextContent.Trim(); + if (hash == null) + throw new Exception($"Failed to fetch hash from {link}"); + + var title = dom.QuerySelector("div.postTitle h1")?.TextContent.Trim(); + if (title == null) + throw new Exception($"Failed to fetch title from {link}"); + + title = StringUtil.MakeValidFileName(title, '_', false); + + var magnet = MagnetUtil.InfoHashToPublicMagnet(hash, title); + + return await base.Download(magnet); + } + + private List ParseReleases(WebResult response) + { + var releases = new List(); + + var dom = ParseHtmlDocument(response.ContentString); + + var rows = dom.QuerySelectorAll("div.post:has(div[class=\"postTitle\"])"); + foreach (var row in rows) + { + var detailsLink = row.QuerySelector("div.postTitle h2 a")?.GetAttribute("href")?.Trim().TrimStart('/'); + var details = new Uri(SiteLink + detailsLink); + + var title = row.QuerySelector("div.postTitle")?.TextContent.Trim(); + + var infoString = row.QuerySelector("div.postContent")?.TextContent.Trim() ?? string.Empty; + + var matchFormat = Regex.Match(infoString, @"Format: (.+) \/", RegexOptions.IgnoreCase); + if (matchFormat.Groups[1].Success && matchFormat.Groups[1].Value.Length > 0 && matchFormat.Groups[1].Value != "?") + title += $" [{matchFormat.Groups[1].Value.Trim()}]"; + + var matchBitrate = Regex.Match(infoString, @"Bitrate: (.+)File", RegexOptions.IgnoreCase); + if (matchBitrate.Groups[1].Success && matchBitrate.Groups[1].Value.Length > 0 && matchBitrate.Groups[1].Value != "?") + title += $" [{matchBitrate.Groups[1].Value.Trim()}]"; + + var matchSize = Regex.Match(infoString, @"File Size: (.+?)s?$", RegexOptions.IgnoreCase); + var size = matchSize.Groups[1].Success ? ReleaseInfo.GetBytes(matchSize.Groups[1].Value) : 0; + + var matchDateAdded = Regex.Match(infoString, @"Posted: (\d{1,2} \D{3} \d{4})", RegexOptions.IgnoreCase); + var publishDate = matchDateAdded.Groups[1].Success && DateTime.TryParseExact(matchDateAdded.Groups[1].Value, "d MMM yyyy", CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal, out var parsedDate) ? parsedDate : DateTime.Now; + + var postInfo = row.QuerySelector("div.postInfo")?.FirstChild?.TextContent.Trim().Replace("\xA0", ";") ?? string.Empty; + var matchCategory = Regex.Match(postInfo, @"Category: (.+)$", RegexOptions.IgnoreCase); + var category = matchCategory.Groups[1].Success ? matchCategory.Groups[1].Value.Split(';').Select(c => c.Trim()).ToList() : new List(); + var categories = category.SelectMany(MapTrackerCatDescToNewznab).Distinct().ToList(); + + var release = new ReleaseInfo + { + Guid = details, + Details = details, + Link = details, + Title = title, + Category = categories, + Size = size, + Seeders = 1, + Peers = 1, + PublishDate = publishDate, + DownloadVolumeFactor = 0, + UploadVolumeFactor = 1 + }; + + var cover = row.QuerySelector("img[src]")?.GetAttribute("src")?.Trim(); + if (!string.IsNullOrEmpty(cover)) + release.Poster = cover.StartsWith("http") ? new Uri(cover) : new Uri(SiteLink + cover); + + releases.Add(release); + } + + return releases; + } + + private static IHtmlDocument ParseHtmlDocument(string response) + { + var parser = new HtmlParser(); + var dom = parser.ParseDocument(response); + + var hidden = dom.QuerySelectorAll("div.post.re-ab"); + foreach (var element in hidden) + { + var body = dom.CreateElement(); + body.ClassList.Add("post"); + body.InnerHtml = Encoding.UTF8.GetString(Convert.FromBase64String(element.TextContent)); + element.Parent.ReplaceChild(body, element); + } + + return dom; + } + } +} diff --git a/src/Jackett.Updater/Program.cs b/src/Jackett.Updater/Program.cs index 700261934..c265028ef 100644 --- a/src/Jackett.Updater/Program.cs +++ b/src/Jackett.Updater/Program.cs @@ -278,6 +278,7 @@ namespace Jackett.Updater "Definitions/asgaard.yml", "Definitions/ast4u.yml", // renamed to animeworld #10578 "Definitions/asylumshare.yml", + "Definitions/audiobookbay.yml", // migrated to C# "Definitions/audiobooktorrents.yml", // renamed to abtorrents "Definitions/avg.yml", "Definitions/awesomehd.yml", // migrated to C#