Even more flexibility in the HTML source — JSON string extraction fallback (#2262)

This commit is contained in:
Imran Remtulla
2025-04-27 00:10:06 -04:00
parent ecf4326b47
commit 950bf28289

View File

@ -1,3 +1,5 @@
import 'dart:convert';
import 'package:easy_localization/easy_localization.dart'; import 'package:easy_localization/easy_localization.dart';
import 'package:html/parser.dart'; import 'package:html/parser.dart';
import 'package:http/http.dart'; import 'package:http/http.dart';
@ -67,6 +69,27 @@ int compareAlphaNumeric(String a, String b) {
return aParts.length.compareTo(bParts.length); return aParts.length.compareTo(bParts.length);
} }
List<String> collectAllStringsFromJSONObject(dynamic obj) {
List<String> extractor(dynamic obj) {
final results = <String>[];
if (obj is String) {
results.add(obj);
} else if (obj is List) {
for (final item in obj) {
results.addAll(extractor(item));
}
} else if (obj is Map<String, dynamic>) {
for (final value in obj.values) {
results.addAll(extractor(value));
}
}
return results;
}
return extractor(obj);
}
List<String> _splitAlphaNumeric(String s) { List<String> _splitAlphaNumeric(String s) {
List<String> parts = []; List<String> parts = [];
StringBuffer sb = StringBuffer(); StringBuffer sb = StringBuffer();
@ -95,6 +118,13 @@ bool _isNumeric(String s) {
return s.codeUnitAt(0) >= 48 && s.codeUnitAt(0) <= 57; return s.codeUnitAt(0) >= 48 && s.codeUnitAt(0) <= 57;
} }
List<MapEntry<String, String>> getLinksInLines(String lines) => RegExp(
r'(http|ftp|https)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?')
.allMatches(lines)
.map((match) =>
MapEntry(match.group(0)!, match.group(0)?.split('/').last ?? ''))
.toList();
// Given an HTTP response, grab some links according to the common additional settings // Given an HTTP response, grab some links according to the common additional settings
// (those that apply to intermediate and final steps) // (those that apply to intermediate and final steps)
Future<List<MapEntry<String, String>>> grabLinksCommon( Future<List<MapEntry<String, String>>> grabLinksCommon(
@ -114,12 +144,21 @@ Future<List<MapEntry<String, String>>> grabLinksCommon(
.map((e) => MapEntry(ensureAbsoluteUrl(e.key, res.request!.url), e.value)) .map((e) => MapEntry(ensureAbsoluteUrl(e.key, res.request!.url), e.value))
.toList(); .toList();
if (allLinks.isEmpty) { if (allLinks.isEmpty) {
allLinks = RegExp( allLinks = getLinksInLines(res.body);
r'(http|ftp|https)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?') }
.allMatches(res.body) if (allLinks.isEmpty) {
.map((match) => // Getting desperate
MapEntry(match.group(0)!, match.group(0)?.split('/').last ?? '')) try {
.toList(); var jsonStrings = collectAllStringsFromJSONObject(jsonDecode(res.body));
allLinks = getLinksInLines(jsonStrings.join('\n'));
if (allLinks.isEmpty) {
allLinks = getLinksInLines(jsonStrings.map((l) {
return ensureAbsoluteUrl(l, res.request!.url);
}).join('\n'));
}
} catch (e) {
//
}
} }
List<MapEntry<String, String>> links = []; List<MapEntry<String, String>> links = [];
bool skipSort = additionalSettings['skipSort'] == true; bool skipSort = additionalSettings['skipSort'] == true;