Skip to content

Commit 3196274

Browse files
Implement rust xml parser
1 parent eea722a commit 3196274

File tree

16 files changed

+615
-177
lines changed

16 files changed

+615
-177
lines changed

Monal/Classes/HelperTools.m

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2924,12 +2924,9 @@ +(NSNumber*) dateToNSNumberSeconds:(NSDate*) date
29242924
if(xmlString == nil)
29252925
return nil;
29262926
DDLogVerbose(@"Parsing XML string produced by rust sdp parser(withInitiator=%@): %@", bool2str(initiator), xmlString);
2927-
NSXMLParser* xmlParser = [[NSXMLParser alloc] initWithData:[xmlString dataUsingEncoding:NSUTF8StringEncoding]];
2928-
[xmlParser setShouldProcessNamespaces:YES];
2929-
[xmlParser setShouldReportNamespacePrefixes:YES]; //for debugging only
2930-
[xmlParser setShouldResolveExternalEntities:NO];
2931-
[xmlParser setDelegate:delegate];
2932-
[xmlParser parse]; //blocking operation
2927+
XmlParserBridge* xmlParser = [[XmlParserBridge alloc] initWith:delegate];
2928+
NSData* xmlData = [xmlString dataUsingEncoding:NSUTF8StringEncoding];
2929+
[xmlParser feedData:xmlData.bytes withLength:xmlData.length]; //blocking operation
29332930
return retval;
29342931
}
29352932

Monal/Classes/MLBasePaser.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,27 @@
1010
#import <monalxmpp/MLXMLNode.h>
1111

1212
//stanzas
13-
#import "XMPPIQ.h"
13+
#import <monalxmpp/XMPPIQ.h>
1414
#import <monalxmpp/XMPPPresence.h>
15-
#import "XMPPMessage.h"
15+
#import <monalxmpp/XMPPMessage.h>
1616
#import <monalxmpp/XMPPDataForm.h>
1717

1818

1919
NS_ASSUME_NONNULL_BEGIN
2020

2121
typedef void (^stanza_completion_t)(MLXMLNode* _Nullable parsedStanza);
2222

23-
@interface MLBasePaser : NSObject <NSXMLParserDelegate>
23+
@interface MLBasePaser : NSObject
2424

2525
-(id) initWithCompletion:(stanza_completion_t) completion;
2626
-(void) reset;
2727

28+
-(void) parserDidStartDocument:(NSString*) xmlVersion;
29+
-(void) parserDidStartElement:(NSString*) elementName namespaceURI:(NSString*) namespaceURI attributes:(NSDictionary*) attributeDict;
30+
-(void) parserFoundCharacters:(NSString*) string;
31+
-(void) parserDidEndInnermostElement;
32+
-(void) parserErrorOccurred:(NSString*) parseError;
33+
2834
@end
2935

3036
NS_ASSUME_NONNULL_END

Monal/Classes/MLBasePaser.m

Lines changed: 17 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@
99
#import <monalxmpp/MLConstants.h>
1010
#import "MLBasePaser.h"
1111

12-
//#define DebugParser(...) DDLogDebug(__VA_ARGS__)
12+
// #define DebugParser(...) DDLogDebug(__VA_ARGS__)
1313
#define DebugParser(...)
14-
1514
@interface MLXMLNode()
1615
@property (atomic, readwrite) MLXMLNode* parent;
1716
-(MLXMLNode*) addChildNodeWithoutCopy:(MLXMLNode*) child;
@@ -23,7 +22,6 @@ @interface MLBasePaser ()
2322
//(the parent references of the MLXMLNodes are weak and don't hold the parents alive)
2423
NSMutableArray* _currentStack;
2524
stanza_completion_t _completion;
26-
NSMutableArray* _namespacePrefixes;
2725
}
2826
@end
2927

@@ -33,6 +31,7 @@ -(id) initWithCompletion:(stanza_completion_t) completion
3331
{
3432
self = [super init];
3533
_completion = completion;
34+
[self reset];
3635
return self;
3736
}
3837

@@ -41,26 +40,15 @@ -(void) reset
4140
_currentStack = [NSMutableArray new];
4241
}
4342

44-
-(void) parserDidStartDocument:(NSXMLParser*) parser
45-
{
46-
DDLogInfo(@"Document start");
47-
[self reset];
48-
}
49-
50-
-(void) parser:(NSXMLParser*) parser didStartMappingPrefix:(NSString*) prefix toURI:(NSString*) namespaceURI
51-
{
52-
DebugParser(@"Got new namespace prefix mapping for '%@' to '%@'...", prefix, namespaceURI);
53-
}
54-
55-
-(void) parser:(NSXMLParser*) parser didEndMappingPrefix:(NSString*) prefix
43+
-(void) parserDidStartDocument:(NSString*) xmlVersion
5644
{
57-
DebugParser(@"Namespace prefix '%@' now out of scope again...", prefix);
45+
DDLogDebug(@"Document start, xml version: %@", xmlVersion);
5846
}
5947

60-
-(void) parser:(NSXMLParser*) parser didStartElement:(NSString*) elementName namespaceURI:(NSString*) namespaceURI qualifiedName:(NSString*) qName attributes:(NSDictionary*) attributeDict
48+
-(void) parserDidStartElement:(NSString*) elementName namespaceURI:(NSString*) namespaceURI attributes:(NSDictionary*) attributeDict
6149
{
6250
NSInteger depth = [_currentStack count] + 1; //this makes the depth in here equal to the depth in didEndElement:
63-
DebugParser(@"Started element: %@ :: %@ (%@) depth %ld", elementName, namespaceURI, qName, depth);
51+
DebugParser(@"Started element: %@ :: %@ depth %ld", elementName, namespaceURI, depth);
6452

6553
//use appropriate MLXMLNode child classes for iq, message and presence stanzas
6654
MLXMLNode* newNode;
@@ -77,28 +65,30 @@ -(void) parser:(NSXMLParser*) parser didStartElement:(NSString*) elementName nam
7765
newNode = [newNode initWithElement:elementName andNamespace:namespaceURI withAttributes:attributeDict andChildren:@[] andData:nil];
7866

7967
DebugParser(@"Current stack: %@", _currentStack);
68+
DebugParser(@"New node: %@", newNode);
8069
//add new node to tree (each node needs a prototype MLXMLNode element and a mutable string to hold its future
8170
//char data added to the MLXMLNode when the xml element is closed
8271
newNode.parent = [_currentStack lastObject][@"node"];
8372
[_currentStack addObject:@{@"node": newNode, @"charData": [NSMutableString new]}];
73+
DebugParser(@"New stack: %@", _currentStack);
8474
}
8575

86-
-(void) parser:(NSXMLParser*) parser foundCharacters:(NSString*) string
76+
-(void) parserFoundCharacters:(NSString*) string
8777
{
8878
DebugParser(@"Got new xml character data: '%@'", string);
8979
NSInteger depth = [_currentStack count];
9080
if(depth == 0)
9181
{
9282
DDLogError(@"Got xml character data outside of any element!");
93-
[self fakeStreamError];
83+
[self fakeStreamErrorWithMessage:@"Got xml character data outside of any element!"];
9484
return;
9585
}
9686

9787
[[_currentStack lastObject][@"charData"] appendString:string];
9888
DebugParser(@"_currentCharData is now: '%@'", [_currentStack lastObject][@"charData"]);
9989
}
10090

101-
-(void) parser:(NSXMLParser*) parser didEndElement:(NSString*) elementName namespaceURI:(NSString*) namespaceURI qualifiedName:(NSString*) qName
91+
-(void) parserDidEndInnermostElement
10292
{
10393
NSInteger depth = [_currentStack count];
10494
NSDictionary* topmostStackElement = [_currentStack lastObject];
@@ -107,7 +97,7 @@ -(void) parser:(NSXMLParser*) parser didEndElement:(NSString*) elementName names
10797
if([topmostStackElement[@"charData"] length])
10898
currentNode.data = [topmostStackElement[@"charData"] copy];
10999

110-
DebugParser(@"Ended element: %@ :: %@ (%@) depth %ld", elementName, namespaceURI, qName, depth);
100+
DebugParser(@"Ended element: %@ depth %ld", currentNode.element, depth);
111101

112102
MLXMLNode* parent = currentNode.parent;
113103
if(parent)
@@ -126,31 +116,19 @@ -(void) parser:(NSXMLParser*) parser didEndElement:(NSString*) elementName names
126116
_completion(currentNode);
127117
}
128118

129-
-(void) parserDidEndDocument:(NSXMLParser*) parser
130-
{
131-
DDLogInfo(@"Document end");
132-
}
133-
134-
-(void) parser:(NSXMLParser*) parser foundIgnorableWhitespace:(NSString*) whitespaceString
119+
-(void) parserErrorOccurred:(NSString*) parseError
135120
{
136-
DebugParser(@"Found ignorable whitespace: '%@'", whitespaceString);
121+
[self fakeStreamErrorWithMessage:parseError];
137122
}
138123

139-
-(void) parser:(NSXMLParser*) parser parseErrorOccurred:(NSError*) parseError
140-
{
141-
DDLogError(@"XML parse error occurred: line: %ld , col: %ld desc: %@ ",(long)[parser lineNumber],
142-
(long)[parser columnNumber], [parseError localizedDescription]);
143-
[self fakeStreamError];
144-
}
145-
146-
-(void) fakeStreamError
124+
-(void) fakeStreamErrorWithMessage:(NSString*) message
147125
{
148126
//fake stream error and let xmpp.m handle it
149127
_completion([[MLXMLNode alloc] initWithElement:@"error" andNamespace:@"http://etherx.jabber.org/streams" withAttributes:@{} andChildren:@[
150128
[[MLXMLNode alloc] initWithElement:@"bad-format" andNamespace:@"urn:ietf:params:xml:ns:xmpp-streams" withAttributes:@{} andChildren:@[
151-
[[MLXMLNode alloc] initWithElement:@"text" andNamespace:@"urn:ietf:params:xml:ns:xmpp-streams" withAttributes:@{} andChildren:@[] andData:@"Could not parse XML coming from server"]
129+
[[MLXMLNode alloc] initWithElement:@"text" andNamespace:@"urn:ietf:params:xml:ns:xmpp-streams" withAttributes:@{} andChildren:@[] andData:[NSString stringWithFormat:@"Could not parse XML coming from server: %@", message]]
152130
] andData:nil]
153131
] andData:nil]);
154132
}
155133

156-
@end
134+
@end

Monal/Classes/SwiftHelpers.swift

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -466,8 +466,6 @@ fileprivate extension RustVec {
466466
}
467467
}
468468

469-
extension RustString: @retroactive Error {}
470-
471469
@objcMembers
472470
public class JingleSDPBridge : NSObject {
473471
@objc(getJingleStringForSDPString:withInitiator:)
@@ -506,3 +504,55 @@ public class HtmlParserBridge : NSObject {
506504
return self.document.select(selector, attribute).intoArray().map { $0.toString() }
507505
}
508506
}
507+
508+
@objcMembers
509+
public class XmlParserBridge : NSObject {
510+
var wrapped: MonalXmlStreamParserWrapper
511+
var delegate: MLBasePaser
512+
513+
public init(with delegate: MLBasePaser) {
514+
//never buffer more than 8192 bytes inside the rust parser and limit maximum
515+
//token length (attribute value, attribute name, element name) to 1024
516+
self.wrapped = MonalXmlStreamParserWrapper(8192, 1024)
517+
self.delegate = delegate
518+
}
519+
520+
@objc(feedData:withLength:)
521+
public func feed(data chunk: UnsafePointer<UInt8>, length size: Int) {
522+
do {
523+
//this is zero-copy
524+
self.wrapped.feed(UnsafeBufferPointer(start: chunk, count: size))
525+
var notDoneYet = true
526+
while notDoneYet {
527+
switch try self.wrapped.poll() {
528+
case .XmlDeclaration(let version):
529+
self.delegate.parserDidStartDocument(version.toString())
530+
case .Start(let element):
531+
let keys: [String] = element.attr_keys!.intoArray().map { $0.toString() }
532+
let values: [String] = element.attr_values!.intoArray().map { $0.toString() }
533+
MLAssert(keys.count == values.count, "Atrribute vectors coming from rust should have the same sizes!", [
534+
"keys": keys as NSArray,
535+
"values": values as NSArray,
536+
])
537+
var attributes: [String:String] = [:]
538+
for i in 0..<keys.count {
539+
attributes[keys[i]] = values[i]
540+
}
541+
self.delegate.parserDidStartElement(element.name.toString(), namespaceURI:element.ns.toString(), attributes:attributes)
542+
case .End:
543+
self.delegate.parserDidEndInnermostElement()
544+
case .Text(let text):
545+
self.delegate.parserFoundCharacters(text.toString())
546+
case .NeedMoreData:
547+
notDoneYet = false
548+
}
549+
}
550+
} catch let err as RustString {
551+
DDLogError("XML parser returned error: \(err.toString())")
552+
self.delegate.parserErrorOccurred(err.toString())
553+
} catch let err {
554+
DDLogError("XML parser returned UNEXPECTED error: \(String(describing:err))")
555+
unreachable("xml parser should never return non-string errors!")
556+
}
557+
}
558+
}

0 commit comments

Comments
 (0)