Erai-Raws: fix indexer (#12083) resolves #4116

This commit is contained in:
6cUbi57z
2021-07-28 03:55:41 +01:00
committed by GitHub
parent 48364972cb
commit a1ee7cc13f
5 changed files with 91 additions and 115 deletions

View File

@@ -51,6 +51,10 @@ body {
width: 60px;
}
.setup-item-label:empty {
margin: 1em;
}
.setup-item-inputstring {
max-width: 255px;
}

View File

@@ -28,7 +28,7 @@
<link rel="stylesheet" type="text/css" href="../bootstrap/bootstrap.min.css?changed=2017083001">
<link rel="stylesheet" type="text/css" href="../animate.css?changed=2017083001">
<link rel="stylesheet" type="text/css" href="../css/tagify.css?changed=11662">
<link rel="stylesheet" type="text/css" href="../custom.css?changed=11662" media="only screen and (min-device-width: 480px)">
<link rel="stylesheet" type="text/css" href="../custom.css?changed=PR12083" media="only screen and (min-device-width: 480px)">
<link rel="stylesheet" type="text/css" href="../custom_mobile.css?changed=11662" media="only screen and (max-device-width: 480px)">
<link rel="stylesheet" type="text/css" href="../css/jquery.dataTables.min.css?changed=2017083001">
<link rel="stylesheet" type="text/css" href="../css/bootstrap-multiselect.css?changed=2017083001" />

View File

@@ -16,7 +16,7 @@
<link href="../bootstrap/bootstrap.min.css" rel="stylesheet">
<link href="../animate.css" rel="stylesheet">
<link href="../custom.css?changed=20200223" rel="stylesheet">
<link href="../custom.css?changed=PR12083" rel="stylesheet">
<title>Jackett</title>
</head>

View File

@@ -16,15 +16,16 @@ namespace Jackett.Common.Indexers
{
public class EraiRaws : BaseWebIndexer
{
const string RSS_PATH = "rss-all-magnet";
private readonly IReadOnlyDictionary<string, int> sizeEstimates = new Dictionary<string, int>() {
{ "1080p", 1332 }, // ~1.3GiB
{ "720p", 700 },
{ "540p", 350 }
};
const string RSS_PATH = "feed/?type=magnet";
public override string[] AlternativeSiteLinks { get; protected set; } = {
// At some point the beta site will probably replace the current one
// At that point, these can probably be re-enabled.
// "https://www.erai-raws.info/",
// "https://erairaws.nocensor.space/"
};
public override string[] LegacySiteLinks { get; protected set; } = {
"https://www.erai-raws.info/",
"https://erairaws.nocensor.space/"
};
@@ -34,7 +35,8 @@ namespace Jackett.Common.Indexers
: base(id: "erai-raws",
name: "Erai-Raws",
description: "Erai-Raws is a team release site for Anime subtitles.",
link: "https://www.erai-raws.info/",
//link: "https://www.erai-raws.info/",
link: "https://beta.erai-raws.info/",
caps: new TorznabCapabilities
{
TvSearchParams = new List<TvSearchParam>
@@ -56,7 +58,7 @@ namespace Jackett.Common.Indexers
// Add note that download stats are not available
configData.AddDynamic(
"download-stats-unavailable",
new DisplayInfoConfigurationItem("", "<p>Please note that the following stats are not available for this indexer. Default values are used instead. </p><ul><li>Size</li><li>Seeders</li><li>Leechers</li><li>Download Factor</li><li>Upload Factor</li></ul>")
new DisplayInfoConfigurationItem("", "<p>Please note that the following stats are not available for this indexer. Default values are used instead. </p><ul><li>Seeders</li><li>Leechers</li><li>Download Factor</li><li>Upload Factor</li></ul>")
);
// Config item for title detail parsing
@@ -114,6 +116,9 @@ namespace Jackett.Common.Indexers
var xmlDocument = new XmlDocument();
xmlDocument.LoadXml(result.ContentString);
var nsm = new XmlNamespaceManager(xmlDocument.NameTable);
nsm.AddNamespace("erai", "https://beta.erai-raws.info/rss-page/");
// Parse to RssFeedItems
var xmlNodes = xmlDocument.GetElementsByTagName("item");
List<RssFeedItem> feedItems = new List<RssFeedItem>();
@@ -121,7 +126,7 @@ namespace Jackett.Common.Indexers
{
var node = (XmlNode)n;
if (RssFeedItem.TryParse(node, out RssFeedItem item))
if (RssFeedItem.TryParse(nsm, node, out RssFeedItem item))
{
feedItems.Add(item);
}
@@ -153,9 +158,6 @@ namespace Jackett.Common.Indexers
continue;
}
// Run the title parser for the details link
releaseInfo.DetailsLink = new Uri(string.Format("{0}anime-list/{1}", SiteLink, titleParser.GetUrlSlug(releaseInfo.Title)));
// If enabled, perform detailed title parsing
if (IsTitleDetailParsingEnabled)
{
@@ -191,7 +193,7 @@ namespace Jackett.Common.Indexers
Category = MapTrackerCatToNewznab("1"),
// Download stats are not available through scraping so set some mock values.
Size = GetSizeEstimate(fi),
Size = fi.Size,
Seeders = 1,
Peers = 2,
DownloadVolumeFactor = 0,
@@ -200,24 +202,6 @@ namespace Jackett.Common.Indexers
}
}
/// <summary>
/// Get an estimate of the file size based on the release info.
/// </summary>
/// <remarks>
/// These estimates are currently only based on Quality. They will be very inaccurate for batch releases.
/// </remarks>
private long GetSizeEstimate(EraiRawsReleaseInfo releaseInfo)
{
long sizeEstimateInMiB = 256;
if (sizeEstimates.ContainsKey(releaseInfo.Quality.ToLower()))
{
sizeEstimateInMiB = sizeEstimates[releaseInfo.Quality.ToLower()];
}
// Convert to bytes and return
return sizeEstimateInMiB * (1024 * 1024);
}
private static string PrefixOrDefault(string prefix, string value, string def = "")
{
if (string.IsNullOrWhiteSpace(value))
@@ -235,37 +219,55 @@ namespace Jackett.Common.Indexers
/// </summary>
private class RssFeedItem
{
public static bool TryParse(XmlNode rssItem, out RssFeedItem item)
public static bool TryParse(XmlNamespaceManager nsm, XmlNode rssItem, out RssFeedItem item)
{
var title = rssItem.SelectSingleNode("title")?.InnerText;
var link = rssItem.SelectSingleNode("link")?.InnerText;
var publishDate = rssItem.SelectSingleNode("pubDate")?.InnerText;
var size = rssItem.SelectSingleNode("erai:size", nsm)?.InnerText;
var description = rssItem.SelectSingleNode("description")?.InnerText;
var quality = rssItem.SelectSingleNode("erai:res", nsm)?.InnerText;
if (string.IsNullOrWhiteSpace(title) ||
string.IsNullOrWhiteSpace(link) ||
string.IsNullOrWhiteSpace(publishDate))
item = new RssFeedItem
{
// One of the properties was empty so fail to parse
item = null;
return false;
Title = title,
Link = link,
PublishDate = publishDate,
Size = size,
Description = description,
Quality = quality
};
return item.IsValid();
}
item = new RssFeedItem(title, link, publishDate);
return true;
}
private RssFeedItem(string title, string link, string publishDate)
private RssFeedItem()
{
Title = title;
Link = link;
PublishDate = publishDate;
// Nothing to do
}
public string Title { get; set; }
public string Link { get; }
public string Link { get; private set; }
public string PublishDate { get; }
public string PublishDate { get; private set; }
public string Size { get; private set; }
public string Description { get; private set; }
public string Quality { get; private set; }
/// <summary>
/// Check there is enough information to process the item.
/// </summary>
private bool IsValid()
{
return !(string.IsNullOrWhiteSpace(Title) ||
string.IsNullOrWhiteSpace(Link) ||
string.IsNullOrWhiteSpace(PublishDate) ||
string.IsNullOrWhiteSpace(Size) ||
string.IsNullOrWhiteSpace(Quality));
}
}
/// <summary>
@@ -275,10 +277,10 @@ namespace Jackett.Common.Indexers
{
public EraiRawsReleaseInfo(RssFeedItem feedItem)
{
var splitTitle = SplitQualityAndTitle(feedItem.Title);
Quality = splitTitle.quality;
Title = splitTitle.title;
Title = StripTitle(feedItem.Title);
Quality = feedItem.Quality;
Size = ReleaseInfo.GetBytes(feedItem.Size);
DetailsLink = ParseDetailsLink(feedItem.Description);
if (Uri.TryCreate(feedItem.Link, UriKind.Absolute, out Uri magnetUri))
{
@@ -291,15 +293,26 @@ namespace Jackett.Common.Indexers
}
}
private (string quality, string title) SplitQualityAndTitle(string rawTitle)
private string StripTitle(string rawTitle)
{
var match = Regex.Match(rawTitle, @"^\[(?<quality>[0-9]+[ip])\] (?<title>.*)$", RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(0.5));
if (match.Success)
{
return (match.Groups["quality"].Value, match.Groups["title"].Value);
var prefixStripped = Regex.Replace(rawTitle, "^\\[.+?\\] ", "");
var suffixStripped = Regex.Replace(prefixStripped, " \\[.+\\]", "");
return suffixStripped.Trim();
}
return (string.Empty, rawTitle);
private Uri ParseDetailsLink(string description)
{
var match = Regex.Match(description, "href=\"(.+?)\"");
if (match.Success)
{
var detailsLinkText = match.Groups[1].Value;
if (Uri.TryCreate(detailsLinkText, UriKind.Absolute, out Uri detailsLink))
{
return detailsLink;
}
}
return null;
}
public string Quality { get; }
@@ -311,6 +324,8 @@ namespace Jackett.Common.Indexers
public Uri DetailsLink { get; set; }
public DateTimeOffset? PublishDate { get; }
public long Size { get; }
}
public class TitleParser
@@ -318,16 +333,16 @@ namespace Jackett.Common.Indexers
private readonly Dictionary<string, string> DETAIL_SEARCH_SEASON = new Dictionary<string, string> {
{ " Season (?<detail>[0-9]+)", "" }, // "Season 2"
{ " (?<detail>[0-9]+)(st|nd|rd|th) Season", "" }, // "2nd Season"
{ " Part (?<detail>[0-9]+) ", " " }, // "<title> Part 2 <episode>"
{ " (?<detail>[0-9]+) ", " " } // "<title> 2 <episode>" - NOT A HYPHEN!
{ " Part (?<detail>[0-9]+) - ", " - " }, // "<title> Part 2 - <episode>"
{ " (?<detail>[0-9]+) - ", " - " } // "<title> 2 - <episode>"
};
private readonly Dictionary<string, string> DETAIL_SEARCH_EPISODE = new Dictionary<string, string> {
{ " (?<detail>[0-9]+)$", " " }, // "<title> <episode>" <end_of_title> - NOT A HYPHEN!
{ " (?<detail>[0-9]+) ", " " } // "<title> <episode> ..." - NOT A HYPHEN!
{ " - (?<detail>[0-9]+)$", " - " }, // "<title> - <episode>" <end_of_title>
{ " - (?<detail>[0-9]+) ", " - " } // "<title> - <episode> ..."
};
private const string TITLE_URL_SLUG_REGEX = @"^(?<url_slug>.+) ";
private const string TITLE_URL_SLUG_REGEX = @"^(?<url_slug>.+) -";
public string Parse(string title)
{
@@ -341,13 +356,13 @@ namespace Jackett.Common.Indexers
PrefixOrDefault("E", results.details["episode"]).Trim()
);
// If title still contains the strange hyphen, insert the identifier after it. Otherwise put it at the end.
int strangeHyphenPosition = results.strippedTitle.LastIndexOf("");
// If title still contains the hyphen, insert the identifier after it. Otherwise put it at the end.
int strangeHyphenPosition = results.strippedTitle.LastIndexOf("-");
if (strangeHyphenPosition > -1)
{
return string.Concat(
results.strippedTitle.Substring(0, strangeHyphenPosition).Trim(),
" ",
" - ",
seasonEpisodeIdentifier,
" ",
results.strippedTitle.Substring(strangeHyphenPosition + 1).Trim()
@@ -361,25 +376,6 @@ namespace Jackett.Common.Indexers
).Trim();
}
public string GetUrlSlug(string title)
{
var match = Regex.Match(title, TITLE_URL_SLUG_REGEX, RegexOptions.IgnoreCase, TimeSpan.FromSeconds(0.5));
if (!match.Success)
{
return null;
}
var urlSlug = match.Groups["url_slug"].Value.ToLowerInvariant();
urlSlug = Regex.Replace(urlSlug, "[^a-zA-Z0-9]", "-");
urlSlug = urlSlug.Trim('-');
while (urlSlug.Contains("--"))
{
urlSlug = urlSlug.Replace("--", "-");
}
return urlSlug;
}
private static (string strippedTitle, Dictionary<string, string> details) SearchTitleForDetails(string title, Dictionary<string, Dictionary<string, string>> definition)
{
Dictionary<string, string> details = new Dictionary<string, string>();

View File

@@ -15,13 +15,6 @@ namespace Jackett.Test.Common.Indexers
var titleParser = new EraiRaws.TitleParser();
return titleParser.Parse(title);
}
[TestCaseSource(typeof(UrlSlugTestData), nameof(UrlSlugTestData.TestCases))]
public string TestTitleParsing_GetUrlSlug(string title)
{
var titleParser = new EraiRaws.TitleParser();
return titleParser.GetUrlSlug(title);
}
}
public class TitleParserTestData
@@ -30,28 +23,11 @@ namespace Jackett.Test.Common.Indexers
{
get
{
yield return new TestCaseData("[1080p] Tokyo Revengers").Returns("[1080p] Tokyo Revengers");
yield return new TestCaseData("[1080p] Tokyo Revengers 02").Returns("[1080p] Tokyo Revengers E02");
yield return new TestCaseData("[1080p] Mairimashita! Iruma-kun 2nd Season 01").Returns("[1080p] Mairimashita! Iruma-kun S2E01");
yield return new TestCaseData("[540p] Seijo no Maryoku wa Bannou Desu 02 v2 (Multi)").Returns("[540p] Seijo no Maryoku wa Bannou Desu E02 v2 (Multi)");
yield return new TestCaseData("[1080p] Yuukoku no Moriarty Part 2 01 (Multi)").Returns("[1080p] Yuukoku no Moriarty S2E01 (Multi)");
}
}
}
public class UrlSlugTestData
{
public static IEnumerable TestCases
{
get
{
yield return new TestCaseData("Tokyo Revengers 02").Returns("tokyo-revengers");
yield return new TestCaseData("Mairimashita! Iruma-kun 2nd Season 01").Returns("mairimashita-iruma-kun-2nd-season");
yield return new TestCaseData("Seijo no Maryoku wa Bannou Desu 02 v2 (Multi)").Returns("seijo-no-maryoku-wa-bannou-desu");
yield return new TestCaseData("Yuukoku no Moriarty Part 2 01 (Multi)").Returns("yuukoku-no-moriarty-part-2");
yield return new TestCaseData("Maou-sama Retry! 12 END ").Returns("maou-sama-retry");
yield return new TestCaseData("Baki (2018) 01 ~ 26 ").Returns("baki-2018");
yield return new TestCaseData("Free!: Dive to the Future 01 ~ 26 ").Returns("free-dive-to-the-future");
yield return new TestCaseData("Tokyo Revengers").Returns("Tokyo Revengers");
yield return new TestCaseData("Tokyo Revengers - 02").Returns("Tokyo Revengers - E02");
yield return new TestCaseData("Mairimashita! Iruma-kun 2nd Season - 01").Returns("Mairimashita! Iruma-kun - S2E01");
yield return new TestCaseData("Seijo no Maryoku wa Bannou Desu - 02 v2 (Multi)").Returns("Seijo no Maryoku wa Bannou Desu - E02 v2 (Multi)");
yield return new TestCaseData("Yuukoku no Moriarty Part 2 - 01 (Multi)").Returns("Yuukoku no Moriarty - S2E01 (Multi)");
}
}
}