New: Improve size and number parsing

This commit is contained in:
Qstick
2021-10-06 21:55:35 -05:00
parent 25bb10d62b
commit 293b32ea0e
25 changed files with 103 additions and 68 deletions

View File

@@ -0,0 +1,24 @@
using FluentAssertions;
using NUnit.Framework;
using NzbDrone.Core.Parser;
using NzbDrone.Core.Test.Framework;
namespace NzbDrone.Core.Test.ParserTests
{
[TestFixture]
public class ParseUtilFixture : CoreTest
{
[TestCase("1023.4 KB", 1047961)]
[TestCase("1023.4 MB", 1073112704)]
[TestCase("1,023.4 MB", 1073112704)]
[TestCase("1.023,4 MB", 1073112704)]
[TestCase("1 023,4 MB", 1073112704)]
[TestCase("1.023.4 MB", 1073112704)]
[TestCase("1023.4 GB", 1098867408896)]
[TestCase("1023.4 TB", 1125240226709504)]
public void should_parse_size(string stringSize, long size)
{
ParseUtil.GetBytes(stringSize).Should().Be(size);
}
}
}

View File

@@ -405,7 +405,7 @@ namespace NzbDrone.Core.Indexers.Definitions
const string SizeSelector = ".list.down > .red";
var sizeStr = tabNode.QuerySelector(SizeSelector).TextContent;
return ReleaseInfo.GetBytes(sizeStr);
return ParseUtil.GetBytes(sizeStr);
}
private string GetReleaseLink(AngleSharp.Dom.IElement tabNode)

View File

@@ -276,7 +276,7 @@ namespace NzbDrone.Core.Indexers.Definitions
}
var sizeStr = row.QuerySelector("td:nth-of-type(6)").TextContent;
release.Size = ReleaseInfo.GetBytes(sizeStr);
release.Size = ParseUtil.GetBytes(sizeStr);
var connections = row.QuerySelector("td:nth-of-type(8)").TextContent.Trim().Split("/".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);

View File

@@ -210,7 +210,7 @@ namespace NzbDrone.Core.Indexers.Definitions
private long getReleaseSize(AngleSharp.Dom.IElement tr)
{
var sizeStr = tr.QuerySelector("div.tracker_info_left").TextContent;
return ReleaseInfo.GetBytes(SizeInfoQueryRegex.Match(sizeStr).Groups[1].Value.Trim());
return ParseUtil.GetBytes(SizeInfoQueryRegex.Match(sizeStr).Groups[1].Value.Trim());
}
private DateTime getReleaseDate(AngleSharp.Dom.IElement tr)

View File

@@ -242,7 +242,7 @@ namespace NzbDrone.Core.Indexers.Definitions
var files = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(3)").TextContent);
var publishDate = DateTimeUtil.FromTimeAgo(row.QuerySelector("td:nth-child(4)").TextContent);
var size = ReleaseInfo.GetBytes(row.QuerySelector("td:nth-child(5)").FirstChild.TextContent);
var size = ParseUtil.GetBytes(row.QuerySelector("td:nth-child(5)").FirstChild.TextContent);
var grabs = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(6)").TextContent);
var seeders = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(7)").TextContent);
var leechers = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(8)").TextContent);

View File

@@ -279,7 +279,7 @@ namespace NzbDrone.Core.Indexers.Definitions
release.PublishDate = DateTimeUtil.FromTimeAgo(dateStr);
var sizeStr = row.Children[4].TextContent;
release.Size = ReleaseInfo.GetBytes(sizeStr);
release.Size = ParseUtil.GetBytes(sizeStr);
release.Files = ParseUtil.CoerceInt(row.Children[2].TextContent.Trim());
release.Seeders = ParseUtil.CoerceInt(row.Children[7].TextContent.Trim());

View File

@@ -15,6 +15,7 @@ using NzbDrone.Core.Configuration;
using NzbDrone.Core.Indexers.Exceptions;
using NzbDrone.Core.IndexerSearch.Definitions;
using NzbDrone.Core.Messaging.Events;
using NzbDrone.Core.Parser;
using NzbDrone.Core.Parser.Model;
using NzbDrone.Core.Validation;
@@ -342,7 +343,7 @@ namespace NzbDrone.Core.Indexers.Definitions
release.MinimumSeedTime = 172800; // 48 hours
var size = row.QuerySelector(".size").TextContent;
release.Size = ReleaseInfo.GetBytes(size);
release.Size = ParseUtil.GetBytes(size);
//22 Jul 15
var dateStr = row.QuerySelector(".added").TextContent.Replace("'", string.Empty);

View File

@@ -194,7 +194,7 @@ namespace NzbDrone.Core.Indexers.Definitions
{
Guid = guid,
Title = parsedTitle.Groups["title"].Value,
Size = ReleaseInfo.GetBytes(string.Format("{0} {1}", size.Groups["size"].Value, size.Groups["unit"].Value)),
Size = ParseUtil.GetBytes(string.Format("{0} {1}", size.Groups["size"].Value, size.Groups["unit"].Value)),
PublishDate = publishDate,
Categories = new List<IndexerCategory> { NewznabStandardCategory.Other },
InfoUrl = infoUrl,

View File

@@ -209,7 +209,7 @@ namespace NzbDrone.Core.Indexers.Cardigann
value = release.Categories.ToString();
break;
case "size":
release.Size = ReleaseInfo.GetBytes(value);
release.Size = ParseUtil.GetBytes(value);
value = release.Size.ToString();
break;
case "leechers":

View File

@@ -361,7 +361,7 @@ namespace NzbDrone.Core.Indexers.Definitions
var details = _settings.BaseUrl + qDetailsLink.GetAttribute("href");
var grabs = ParseUtil.CoerceInt(qGrabs.TextContent);
var leechers = ParseUtil.CoerceInt(qLeechers.TextContent);
var size = ReleaseInfo.GetBytes(sizeString);
var size = ParseUtil.GetBytes(sizeString);
var release = new TorrentInfo
{

View File

@@ -276,7 +276,7 @@ namespace NzbDrone.Core.Indexers.Definitions
//"July 11, 2015, 13:34:09", "Today|Yesterday at 20:04:23"
release.PublishDate = DateTimeUtil.FromUnknown(dateStr);
var sizeStr = row.Children[5].TextContent;
release.Size = ReleaseInfo.GetBytes(sizeStr);
release.Size = ParseUtil.GetBytes(sizeStr);
release.Seeders = ParseUtil.CoerceInt(row.Children[7].TextContent);
release.Peers = ParseUtil.CoerceInt(row.Children[8].TextContent) + release.Seeders;
var grabs = row.QuerySelector("td:nth-child(10)").TextContent;

View File

@@ -259,7 +259,7 @@ namespace NzbDrone.Core.Indexers.Definitions
var link = new Uri(_settings.BaseUrl + row.Children[4].FirstElementChild.GetAttribute("href"));
var description = row.Children[2].QuerySelector("span").TextContent;
var size = ReleaseInfo.GetBytes(row.Children[7].TextContent);
var size = ParseUtil.GetBytes(row.Children[7].TextContent);
var dateTag = row.Children[6].FirstElementChild;
var dateString = string.Join(" ", dateTag.Attributes.Select(attr => attr.Name));

View File

@@ -304,7 +304,7 @@ namespace NzbDrone.Core.Indexers.Definitions
// Torrents - Category column == Icons
var cat = _categories.MapTrackerCatToNewznab(catIcon.GetAttribute("href").Substring(1));
var size = ReleaseInfo.GetBytes(row.Children[5].TextContent);
var size = ParseUtil.GetBytes(row.Children[5].TextContent);
var colIndex = 6;
int? files = null;

View File

@@ -280,7 +280,7 @@ namespace NzbDrone.Core.Indexers.Definitions
release.PublishDate = DateTime.ParseExact(dateString, "yyyy-MM-dd hh:mm tt", CultureInfo.InvariantCulture);
var sizeStr = row.QuerySelector("td:nth-of-type(5)").TextContent.Trim();
release.Size = ReleaseInfo.GetBytes(sizeStr);
release.Size = ParseUtil.GetBytes(sizeStr);
release.Seeders = ParseUtil.CoerceInt(row.QuerySelector("td:nth-of-type(7)").TextContent.Trim());
release.Peers = ParseUtil.CoerceInt(row.QuerySelector("td:nth-of-type(8)").TextContent.Trim()) + release.Seeders;

View File

@@ -360,7 +360,7 @@ namespace NzbDrone.Core.Indexers.Definitions
release.Seeders = item.Seeders;
release.Peers = item.Leechers + release.Seeders;
var size = item.Size;
release.Size = ReleaseInfo.GetBytes(size);
release.Size = ParseUtil.GetBytes(size);
release.DownloadVolumeFactor = item.Free ? 0 : 1;
release.UploadVolumeFactor = 1;

View File

@@ -224,7 +224,7 @@ namespace NzbDrone.Core.Indexers.Definitions
var link = _settings.BaseUrl + row.QuerySelector("a[href*='action=download']").GetAttribute("href");
var qColSize = row.QuerySelector("td:nth-child(3)");
var size = ReleaseInfo.GetBytes(qColSize.Children[0].TextContent);
var size = ParseUtil.GetBytes(qColSize.Children[0].TextContent);
var files = ParseUtil.CoerceInt(qColSize.Children[1].TextContent.Split(':')[1].Trim());
var qPublishdate = row.QuerySelector("td:nth-child(4) span");

View File

@@ -342,7 +342,7 @@ namespace NzbDrone.Core.Indexers.Definitions
var dateStr = Regex.Replace(row.Children[5].InnerHtml, @"\<br[\s]{0,1}[\/]{0,1}\>", " ");
var publishDate = DateTimeUtil.FromTimeAgo(dateStr);
var files = ParseUtil.CoerceInt(row.Children[3].TextContent);
var size = ReleaseInfo.GetBytes(row.Children[7].TextContent);
var size = ParseUtil.GetBytes(row.Children[7].TextContent);
var grabs = ParseUtil.CoerceInt(row.Children[8].TextContent);
var seeders = ParseUtil.CoerceInt(row.Children[9].TextContent);
var leechers = ParseUtil.CoerceInt(row.Children[10].TextContent);

View File

@@ -281,7 +281,7 @@ namespace NzbDrone.Core.Indexers.Definitions
var dateString = row.QuerySelector("td:nth-child(6) nobr").TextContent.Trim();
var publishDate = DateTime.ParseExact(dateString, "yyyy-MM-ddHH:mm:ss", CultureInfo.InvariantCulture);
var size = ReleaseInfo.GetBytes(row.QuerySelector("td:nth-child(7)").InnerHtml.Split('<').First().Trim());
var size = ParseUtil.GetBytes(row.QuerySelector("td:nth-child(7)").InnerHtml.Split('<').First().Trim());
var files = ParseUtil.GetLongFromString(row.QuerySelector("td:nth-child(7) > a").TextContent);
var grabs = ParseUtil.GetLongFromString(row.QuerySelector("td:nth-child(8)").TextContent);
var seeders = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(9)").TextContent);

View File

@@ -1672,7 +1672,7 @@ namespace NzbDrone.Core.Indexers.Definitions
private long GetSizeOfRelease(in IElement row)
{
var qSize = row.QuerySelector("td.tor-size");
var size = ReleaseInfo.GetBytes(qSize.GetAttribute("data-ts_text"));
var size = ParseUtil.GetBytes(qSize.GetAttribute("data-ts_text"));
return size;
}

View File

@@ -254,7 +254,7 @@ namespace NzbDrone.Core.Indexers.Definitions
DownloadUrl = string.Format("{0}/download.php/{1}/download.torrent", _settings.BaseUrl, torrentId),
Guid = details,
PublishDate = DateTimeUtil.FromTimeAgo(qDescCol.ChildNodes.Last().TextContent),
Size = ReleaseInfo.GetBytes(sizeStr),
Size = ParseUtil.GetBytes(sizeStr),
Seeders = seeders,
Peers = ParseUtil.CoerceInt(row.Children[leechersIndex].TextContent.Trim()) + seeders,
DownloadVolumeFactor = row.QuerySelector("font > b:contains(Freeleech)") != null ? 0 : 1,

View File

@@ -256,7 +256,7 @@ namespace NzbDrone.Core.Indexers.Definitions
var files = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(3)").TextContent);
var publishDate = DateTimeUtil.FromTimeAgo(row.QuerySelector("td:nth-child(4)").TextContent);
var size = ReleaseInfo.GetBytes(row.QuerySelector("td:nth-child(5)").FirstChild.TextContent);
var size = ParseUtil.GetBytes(row.QuerySelector("td:nth-child(5)").FirstChild.TextContent);
var grabs = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(6)").TextContent);
var seeders = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(7)").TextContent);
var leechers = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(8)").TextContent);

View File

@@ -314,7 +314,7 @@ namespace NzbDrone.Core.Indexers.Definitions
var qColumns = row.QuerySelectorAll("td");
release.Files = ParseUtil.CoerceInt(qColumns[3].TextContent);
release.PublishDate = DateTimeUtil.FromUnknown(qColumns[5].TextContent);
release.Size = ReleaseInfo.GetBytes(qColumns[6].TextContent);
release.Size = ParseUtil.GetBytes(qColumns[6].TextContent);
release.Grabs = ParseUtil.CoerceInt(qColumns[7].TextContent.Replace("Times", ""));
release.Seeders = ParseUtil.CoerceInt(qColumns[8].TextContent);
release.Peers = ParseUtil.CoerceInt(qColumns[9].TextContent) + release.Seeders;

View File

@@ -355,7 +355,7 @@ namespace NzbDrone.Core.Indexers.Definitions
var publishDateStr = row.Children[4].InnerHtml.Split('>').Last();
var publishDate = DateTime.ParseExact(publishDateStr, "dd/MM/yyyy", CultureInfo.InvariantCulture);
var size = ReleaseInfo.GetBytes(row.Children[5].TextContent.Replace(".", "").Replace(",", "."));
var size = ParseUtil.GetBytes(row.Children[5].TextContent.Replace(".", "").Replace(",", "."));
var seeders = ParseUtil.CoerceInt(row.Children[6].TextContent);
var leechers = ParseUtil.CoerceInt(row.Children[7].TextContent);
var grabs = ParseUtil.CoerceInt(row.Children[8].TextContent);

View File

@@ -99,47 +99,5 @@ namespace NzbDrone.Core.Parser.Model
return ToString();
}
}
public static long GetBytes(string str)
{
var valStr = new string(str.Where(c => char.IsDigit(c) || c == '.').ToArray());
var unit = new string(str.Where(char.IsLetter).ToArray());
var val = ParseUtil.CoerceFloat(valStr);
return GetBytes(unit, val);
}
public static long GetBytes(string unit, float value)
{
unit = unit.Replace("i", "").ToLowerInvariant();
if (unit.Contains("kb"))
{
return BytesFromKB(value);
}
if (unit.Contains("mb"))
{
return BytesFromMB(value);
}
if (unit.Contains("gb"))
{
return BytesFromGB(value);
}
if (unit.Contains("tb"))
{
return BytesFromTB(value);
}
return (long)value;
}
public static long BytesFromTB(float tb) => BytesFromGB(tb * 1024f);
public static long BytesFromGB(float gb) => BytesFromMB(gb * 1024f);
public static long BytesFromMB(float mb) => BytesFromKB(mb * 1024f);
public static long BytesFromKB(float kb) => (long)(kb * 1024f);
}
}

View File

@@ -18,10 +18,20 @@ namespace NzbDrone.Core.Parser
public static string NormalizeMultiSpaces(string s) =>
new Regex(@"\s+").Replace(NormalizeSpace(s), " ");
public static string NormalizeNumber(string s) =>
NormalizeSpace(s)
.Replace("-", "0")
.Replace(",", "");
public static string NormalizeNumber(string s)
{
s = (s.Length == 0) ? "0" : s.Replace(",", ".");
s = NormalizeSpace(s).Replace("-", "0");
if (s.Count(c => c == '.') > 1)
{
var lastOcc = s.LastIndexOf('.');
s = s.Substring(0, lastOcc).Replace(".", string.Empty) + s.Substring(lastOcc);
}
return s;
}
public static string RemoveInvalidXmlChars(string text) => string.IsNullOrEmpty(text) ? "" : InvalidXmlChars.Replace(text, "");
@@ -98,5 +108,47 @@ namespace NzbDrone.Core.Parser
var qs = QueryHelpers.ParseQuery(qsStr);
return qs[argument].FirstOrDefault();
}
public static long GetBytes(string str)
{
var valStr = new string(str.Where(c => char.IsDigit(c) || c == '.' || c == ',').ToArray());
var unit = new string(str.Where(char.IsLetter).ToArray());
var val = CoerceFloat(valStr);
return GetBytes(unit, val);
}
public static long GetBytes(string unit, float value)
{
unit = unit.Replace("i", "").ToLowerInvariant();
if (unit.Contains("kb"))
{
return BytesFromKB(value);
}
if (unit.Contains("mb"))
{
return BytesFromMB(value);
}
if (unit.Contains("gb"))
{
return BytesFromGB(value);
}
if (unit.Contains("tb"))
{
return BytesFromTB(value);
}
return (long)value;
}
public static long BytesFromTB(float tb) => BytesFromGB(tb * 1024f);
public static long BytesFromGB(float gb) => BytesFromMB(gb * 1024f);
public static long BytesFromMB(float mb) => BytesFromKB(mb * 1024f);
public static long BytesFromKB(float kb) => (long)(kb * 1024f);
}
}