From a9f3be882fedab0487604b4bd56d98f212ee9c10 Mon Sep 17 00:00:00 2001 From: imprologic Date: Sat, 29 Apr 2023 12:46:18 -0400 Subject: [PATCH 1/2] Fixed timezone parsing --- lib/domain/media/media.dart | 2 +- lib/domain/rss_content.dart | 2 +- lib/util/datetime.dart | 20 ++++++--- lib/util/timezone.dart | 37 +++++++++++++++ test/datetime_test.dart | 89 +++++++++++++++++++++++++++++++++++++ test/rss_test.dart | 4 +- test/timezone_test.dart | 70 +++++++++++++++++++++++++++++ 7 files changed, 214 insertions(+), 10 deletions(-) create mode 100644 lib/util/timezone.dart create mode 100644 test/datetime_test.dart create mode 100644 test/timezone_test.dart diff --git a/lib/domain/media/media.dart b/lib/domain/media/media.dart index e95cb48..430a9a2 100644 --- a/lib/domain/media/media.dart +++ b/lib/domain/media/media.dart @@ -19,8 +19,8 @@ import 'package:webfeed/domain/media/status.dart'; import 'package:webfeed/domain/media/text.dart'; import 'package:webfeed/domain/media/thumbnail.dart'; import 'package:webfeed/domain/media/title.dart'; -import 'package:webfeed/util/xml.dart'; import 'package:webfeed/util/iterable.dart'; +import 'package:webfeed/util/xml.dart'; import 'package:xml/xml.dart'; class Media { diff --git a/lib/domain/rss_content.dart b/lib/domain/rss_content.dart index 531c9ed..f7d7c24 100644 --- a/lib/domain/rss_content.dart +++ b/lib/domain/rss_content.dart @@ -17,7 +17,7 @@ class RssContent { RssContent(this.value, this.images); factory RssContent.parse(XmlElement element) { - final dynamic? content = element.text; + final dynamic content = element.text; final images = []; _imagesRegExp.allMatches(content).forEach((match) { images.add(match.group(1)!); diff --git a/lib/util/datetime.dart b/lib/util/datetime.dart index 6b41fff..c7984cd 100644 --- a/lib/util/datetime.dart +++ b/lib/util/datetime.dart @@ -1,18 +1,26 @@ -import 'package:intl/intl.dart'; +import 'package:intl/intl.dart'; +import './timezone.dart'; + +/// The `Z` part is not yet implemented according to https://pub.dev/documentation/intl/latest/intl/DateFormat-class.html +/// We will remove it for now and parse the timezone separately. +const rfc822DatePattern = 'EEE, dd MMM yyyy HH:mm:ss'; +final rfc822DateFormat = DateFormat(rfc822DatePattern, 'en_US'); -const rfc822DatePattern = 'EEE, dd MMM yyyy HH:mm:ss Z'; DateTime? parseDateTime(dateString) { if (dateString == null) return null; return _parseRfc822DateTime(dateString) ?? _parseIso8601DateTime(dateString); } +/// Try to parse `dateString` as an RFC 822 date. +/// We will parse the date string as UTC and then +/// subtract the actual time offset from the parsed string. DateTime? _parseRfc822DateTime(String dateString) { try { - final num? length = dateString.length.clamp(0, rfc822DatePattern.length); - final trimmedPattern = rfc822DatePattern.substring(0, length as int?); //Some feeds use a shortened RFC 822 date, e.g. 'Tue, 04 Aug 2020' - final format = DateFormat(trimmedPattern, 'en_US'); - return format.parse(dateString); + final localTime = rfc822DateFormat.parse(dateString, true); + final timezone = dateString.trim().split(' ').last; + final timeOffset = Duration(minutes: getTimeZoneOffset(timezone) ?? 0); + return localTime.subtract(timeOffset).toUtc(); } on FormatException { return null; } diff --git a/lib/util/timezone.dart b/lib/util/timezone.dart new file mode 100644 index 0000000..099c79f --- /dev/null +++ b/lib/util/timezone.dart @@ -0,0 +1,37 @@ +const timeZoneAbbreviations = { + 'EET': 2 * 60, + 'CET': 1 * 60, + 'GMT': 0, + 'AST': -4 * 60, + 'EST': -5 * 60, + 'EDT': -4 * 60, + 'CST': -6 * 60, + 'CDT': -5 * 60, + 'MST': -7 * 60, + 'MDT': -6 * 60, + 'PST': -8 * 60, + 'PDT': -7 * 60, +}; + +/// Test this online at https://regex101.com/r/mem3xt/1 +final offsetRegExp = RegExp(r'^(?[\+\-]?)(?\d{2})\:?(?\d{2})$'); + + +/// Parse a potential timezone string and return the +/// time offset in minutes +int? getTimeZoneOffset(String timezone) { + // check if timezone is one of the known abbreviations + var offset = timeZoneAbbreviations[timezone.toUpperCase()]; + if (offset != null) return offset; + // check if the timezone is of type offset + final match = offsetRegExp.firstMatch(timezone); + if (match != null) { + final sign = match.namedGroup('sign') == '-' ? -1 : 1; + // we know and are not null because the RexExp matched + final hours = int.parse(match.namedGroup('hours')!); + final minutes = int.parse(match.namedGroup('minutes')!); + return sign * (60 * hours + minutes); + } + + return null; +} \ No newline at end of file diff --git a/test/datetime_test.dart b/test/datetime_test.dart new file mode 100644 index 0000000..6614909 --- /dev/null +++ b/test/datetime_test.dart @@ -0,0 +1,89 @@ +import 'package:test/test.dart'; +import 'package:webfeed/util/datetime.dart'; + +void main() { + + group('RFC 822 date time', () { + + test('parse GMT date time', () { + final dateString = 'Sat, 29 Apr 2023 12:00:00 GMT'; + final result = parseDateTime(dateString); + expect(result, isNotNull); + expect(result!.isUtc, true); + expect(result, DateTime.utc(2023, 4, 29, 12, 0, 0)); + }); + + + test('parse EST date time', () { + final dateString = 'Sat, 29 Apr 2023 21:22:23 EST'; + final result = parseDateTime(dateString); + expect(result, isNotNull); + expect(result!.isUtc, true); + expect(result, DateTime.utc(2023, 4, 30, 2, 22, 23)); + }); + + + test('parse +0000 offset date time', () { + final dateString = 'Fri, 28 Apr 2023 23:00:57 +0000'; + final result = parseDateTime(dateString); + expect(result, isNotNull); + expect(result!.isUtc, true); + expect(result, DateTime.utc(2023, 4, 28, 23, 0, 57)); + }); + + + test('parse -0000 offset date time', () { + // yes, really, I saw this format here: https://feeds.megaphone.fm/bitcoinaudible + final dateString = 'Thu, 27 Apr 2023 19:17:00 -0000'; + final result = parseDateTime(dateString); + expect(result, isNotNull); + expect(result!.isUtc, true); + expect(result, DateTime.utc(2023, 4, 27, 19, 17, 0)); + }); + + test('parse +0100 offset date time', () { + final dateString = 'Fri, 28 Apr 2023 19:02:17 +0100'; + final result = parseDateTime(dateString); + expect(result, isNotNull); + expect(result!.isUtc, true); + expect(result, DateTime.utc(2023, 4, 28, 18, 2, 17)); + }); + + + test('parse 02:00 offset date time', () { + final dateString = 'Thu, 27 Apr 2023 14:30:00 02:00'; + final result = parseDateTime(dateString); + expect(result, isNotNull); + expect(result!.isUtc, true); + expect(result, DateTime.utc(2023, 4, 27, 12, 30, 0)); + }); + + + test('parse -0500 offset date time', () { + final dateString = 'Thu, 27 Apr 2023 14:30:00 -0500'; + final result = parseDateTime(dateString); + expect(result, isNotNull); + expect(result!.isUtc, true); + expect(result, DateTime.utc(2023, 4, 27, 19, 30, 0)); + }); + + }); + + + + group('ISO 8601 date time', () { + + test('parse +00:00 offset date time', () { + final dateString = '2023-04-24T05:02:37+00:00'; + final result = parseDateTime(dateString); + expect(result, isNotNull); + expect(result!.isUtc, true); + expect(result, DateTime.utc(2023, 4, 24, 5, 2, 37)); + }); + + }); + + + + +} \ No newline at end of file diff --git a/test/rss_test.dart b/test/rss_test.dart index fa3d789..5515164 100644 --- a/test/rss_test.dart +++ b/test/rss_test.dart @@ -73,7 +73,7 @@ void main() { expect(feed.items!.first.link, 'https://foo.bar.news/1'); expect(feed.items!.first.guid, 'https://foo.bar.news/1?guid'); expect(feed.items!.first.pubDate, - DateTime(2018, 03, 26, 14)); //Mon, 26 Mar 2018 14:00:00 PDT + DateTime.utc(2018, 03, 26, 21)); //Mon, 26 Mar 2018 14:00:00 PDT expect(feed.items!.first.categories!.first.domain, 'news'); expect(feed.items!.first.categories!.first.value, 'Lorem'); expect(feed.items!.first.author, 'alice@foo.bar.news'); @@ -104,7 +104,7 @@ void main() { expect(item.title, null); expect(item.link, 'http://www.foo.com'); expect(item.pubDate, - DateTime(2001, 08, 27, 16, 08, 56)); //Mon, 27 Aug 2001 16:08:56 PST + DateTime.utc(2001, 08, 28, 0, 08, 56)); //Mon, 27 Aug 2001 16:08:56 PST expect(item.media!.group!.contents!.length, 5); expect(item.media!.group!.credits!.length, 2); diff --git a/test/timezone_test.dart b/test/timezone_test.dart new file mode 100644 index 0000000..e572041 --- /dev/null +++ b/test/timezone_test.dart @@ -0,0 +1,70 @@ +import 'package:test/test.dart'; +import 'package:webfeed/util/timezone.dart'; + +main() { + + group('Abbreviated timezones', () { + + test('parse GMT timezone', () { + final offset = getTimeZoneOffset('GMT'); + expect(offset, 0); + }); + + test('parse EET timezone', () { + final offset = getTimeZoneOffset('EET'); + expect(offset, 2 * 60); + }); + + test('parse EST timezone', () { + final offset = getTimeZoneOffset('EST'); + expect(offset, -5 * 60); + }); + + }); + + + group('Offset timezones', () { + + test('parse 00:00 timezone', () { + final offset = getTimeZoneOffset('00:00'); + expect(offset, 0); + }); + + test('parse 0000 timezone', () { + final offset = getTimeZoneOffset('0000'); + expect(offset, 0); + }); + + test('parse +01:00 timezone', () { + final offset = getTimeZoneOffset('+01:00'); + expect(offset, 60); + }); + + test('parse 01:00 timezone', () { + final offset = getTimeZoneOffset('01:00'); + expect(offset, 60); + }); + + test('parse 0100 timezone', () { + final offset = getTimeZoneOffset('0100'); + expect(offset, 60); + }); + + test('parse -01:00 timezone', () { + final offset = getTimeZoneOffset('-01:00'); + expect(offset, -60); + }); + + test('parse -0100 timezone', () { + final offset = getTimeZoneOffset('-0100'); + expect(offset, -60); + }); + + test('parse -03:30 timezone', () { + final offset = getTimeZoneOffset('-03:30'); + expect(offset, -3 * 60 - 30); + }); + + }); + +} From 58f99cec529e3384d7486e9fe0ab02071918f3b4 Mon Sep 17 00:00:00 2001 From: imprologic Date: Sat, 29 Apr 2023 14:27:01 -0400 Subject: [PATCH 2/2] changed comment --- lib/util/timezone.dart | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/util/timezone.dart b/lib/util/timezone.dart index 099c79f..2e18a32 100644 --- a/lib/util/timezone.dart +++ b/lib/util/timezone.dart @@ -13,7 +13,7 @@ const timeZoneAbbreviations = { 'PDT': -7 * 60, }; -/// Test this online at https://regex101.com/r/mem3xt/1 +/// Test this regexp online at https://regex101.com/r/mem3xt/1 final offsetRegExp = RegExp(r'^(?[\+\-]?)(?\d{2})\:?(?\d{2})$');