Frameworks/AutoHyperlinks Framework/Source/AHHyperlinkScanner.m
author Stephen Holt <sholt@adium.im>
Wed Aug 12 13:32:55 2009 -0400 (2009-08-12)
changeset 2602 19c704d73e7f
parent 2528 6b768a0be2db
child 2603 aac234118c92
permissions -rw-r--r--
Add em and en dashes to the start set. Fixes #11490
David@0
     1
/*
David@0
     2
 * The AutoHyperlinks Framework is the legal property of its developers (DEVELOPERS), whose names are listed in the
David@0
     3
 * copyright file included with this source distribution.
David@0
     4
 *
David@0
     5
 * Redistribution and use in source and binary forms, with or without
David@0
     6
 * modification, are permitted provided that the following conditions are met:
David@0
     7
 *     * Redistributions of source code must retain the above copyright
David@0
     8
 *       notice, this list of conditions and the following disclaimer.
David@0
     9
 *     * Redistributions in binary form must reproduce the above copyright
David@0
    10
 *       notice, this list of conditions and the following disclaimer in the
David@0
    11
 *       documentation and/or other materials provided with the distribution.
David@0
    12
 *     * Neither the name of the AutoHyperlinks Framework nor the
David@0
    13
 *       names of its contributors may be used to endorse or promote products
David@0
    14
 *       derived from this software without specific prior written permission.
David@0
    15
 *
David@0
    16
 * THIS SOFTWARE IS PROVIDED BY ITS DEVELOPERS ``AS IS'' AND ANY
David@0
    17
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
David@0
    18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
David@0
    19
 * DISCLAIMED. IN NO EVENT SHALL ITS DEVELOPERS BE LIABLE FOR ANY
David@0
    20
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
David@0
    21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
David@0
    22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
David@0
    23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
David@0
    24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
David@0
    25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
David@0
    26
 */
David@0
    27
David@0
    28
#import "AHHyperlinkScanner.h"
David@0
    29
#import "AHLinkLexer.h"
David@0
    30
#import "AHMarkedHyperlink.h"
David@0
    31
David@0
    32
#define	DEFAULT_URL_SCHEME	@"http://"
David@0
    33
#define ENC_INDEX_KEY @"encIndex"
David@0
    34
#define ENC_CHAR_KEY @"encChar"
David@0
    35
David@0
    36
@interface AHHyperlinkScanner (PRIVATE)
David@0
    37
- (NSRange)_longestBalancedEnclosureInRange:(NSRange)inRange;
David@0
    38
- (BOOL)_scanString:(NSString *)inString upToCharactersFromSet:(NSCharacterSet *)inCharSet intoRange:(NSRange *)outRangeRef fromIndex:(unsigned long *)idx;
David@0
    39
- (BOOL)_scanString:(NSString *)inString charactersFromSet:(NSCharacterSet *)inCharSet intoRange:(NSRange *)outRangeRef fromIndex:(unsigned long *)idx;
David@0
    40
@end
David@0
    41
David@0
    42
@implementation AHHyperlinkScanner
David@0
    43
#pragma mark static variables
David@0
    44
	static NSCharacterSet	*skipSet = nil;
David@0
    45
	static NSCharacterSet	*endSet = nil;
David@0
    46
	static NSCharacterSet	*startSet = nil;
David@0
    47
	static NSCharacterSet	*puncSet = nil;
David@0
    48
	static NSCharacterSet	*hostnameComponentSeparatorSet = nil;
David@0
    49
	static NSArray			*enclosureStartArray = nil;
David@0
    50
	static NSCharacterSet	*enclosureSet = nil;
David@0
    51
	static NSArray			*enclosureStopArray = nil;
David@0
    52
	static NSArray			*encKeys = nil;
David@0
    53
	
David@0
    54
#pragma mark Class Methods
David@0
    55
+ (id)hyperlinkScannerWithString:(NSString *)inString
David@0
    56
{
David@0
    57
	return [[[[self class] alloc] initWithString:inString usingStrictChecking:NO] autorelease];
David@0
    58
}
David@0
    59
David@0
    60
+ (id)strictHyperlinkScannerWithString:(NSString *)inString
David@0
    61
{
David@0
    62
	return [[[[self class] alloc] initWithString:inString usingStrictChecking:YES] autorelease];
David@0
    63
}
David@0
    64
David@0
    65
+ (id)hyperlinkScannerWithAttributedString:(NSAttributedString *)inString
David@0
    66
{
David@0
    67
	return [[[[self class] alloc] initWithAttributedString:inString usingStrictChecking:NO] autorelease];
David@0
    68
}
David@0
    69
David@0
    70
+ (id)strictHyperlinkScannerWithAttributedString:(NSAttributedString *)inString
David@0
    71
{
David@0
    72
	return [[[[self class] alloc] initWithAttributedString:inString usingStrictChecking:NO] autorelease];
David@0
    73
}
David@0
    74
Evan@680
    75
#pragma mark Initialization
Evan@680
    76
+ (void)initialize
David@0
    77
{
catfish@2170
    78
	if ((self == [AHHyperlinkScanner class])) {
David@0
    79
		if (!skipSet) {
David@0
    80
			NSMutableCharacterSet *mutableSkipSet = [[NSMutableCharacterSet alloc] init];
David@0
    81
			[mutableSkipSet formUnionWithCharacterSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
David@0
    82
			[mutableSkipSet formUnionWithCharacterSet:[NSCharacterSet illegalCharacterSet]];
David@0
    83
			[mutableSkipSet formUnionWithCharacterSet:[NSCharacterSet controlCharacterSet]];
David@0
    84
			[mutableSkipSet formUnionWithCharacterSet:[NSCharacterSet characterSetWithCharactersInString:@"<>"]];
David@0
    85
			skipSet = [[NSCharacterSet characterSetWithBitmapRepresentation:[mutableSkipSet bitmapRepresentation]] retain];
David@0
    86
			[mutableSkipSet release];
David@0
    87
		}
David@0
    88
		
David@0
    89
		if (!endSet) {
David@0
    90
			endSet = [[NSCharacterSet characterSetWithCharactersInString:@"\"',:;>)]}.?!@"] retain];
David@0
    91
		}
David@0
    92
		
David@0
    93
		if (!startSet) {
David@0
    94
			NSMutableCharacterSet *mutableStartSet = [[NSMutableCharacterSet alloc] init];
David@0
    95
			[mutableStartSet formUnionWithCharacterSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
sholt@2602
    96
			[mutableStartSet formUnionWithCharacterSet:[NSCharacterSet characterSetWithCharactersInString:[NSString stringWithFormat:@"\"'.,:;<?!-@%C%C", 0x2014, 0x2013]]];
David@0
    97
			startSet = [[NSCharacterSet characterSetWithBitmapRepresentation:[mutableStartSet bitmapRepresentation]] retain];
David@0
    98
			[mutableStartSet release];
David@0
    99
		}
David@0
   100
		
David@0
   101
		if (!puncSet) {
David@0
   102
			puncSet = [[NSCharacterSet characterSetWithCharactersInString:@"\"'.,:;<?!"] retain];
David@0
   103
		}
David@0
   104
		
David@0
   105
		if (!hostnameComponentSeparatorSet) {
David@0
   106
			hostnameComponentSeparatorSet = [[NSCharacterSet characterSetWithCharactersInString:@"./"] retain];
David@0
   107
		}
David@0
   108
		
David@0
   109
		if(!enclosureStartArray){
David@0
   110
			enclosureStartArray = [[NSArray arrayWithObjects:@"(",@"[",@"{",nil] retain];
David@0
   111
		}
David@0
   112
		
David@0
   113
		if(!enclosureSet){
David@0
   114
			enclosureSet = [[NSCharacterSet characterSetWithCharactersInString:@"()[]{}"] retain];
David@0
   115
		}
David@0
   116
		
David@0
   117
		if(!enclosureStopArray){
David@0
   118
			enclosureStopArray = [[NSArray arrayWithObjects:@")",@"]",@"}",nil] retain];
David@0
   119
		}
Evan@680
   120
		
David@0
   121
		if(!encKeys){
David@0
   122
			encKeys = [[NSArray arrayWithObjects:ENC_INDEX_KEY, ENC_CHAR_KEY, nil] retain];
Evan@680
   123
		}		
David@0
   124
	}
Evan@680
   125
}
Evan@680
   126
Evan@680
   127
#pragma mark Init/Dealloc
Evan@680
   128
Evan@680
   129
Evan@680
   130
- (id)initWithString:(NSString *)inString usingStrictChecking:(BOOL)flag
Evan@680
   131
{
Evan@680
   132
	if((self = [self init])){
Evan@680
   133
		m_scanString = [inString retain];
Evan@680
   134
		m_scanAttrString = nil;
Evan@680
   135
		m_urlSchemes = [[NSDictionary alloc] initWithObjectsAndKeys:
Evan@680
   136
			@"ftp://", @"ftp",
Evan@680
   137
			nil];
Evan@680
   138
		m_strictChecking = flag;
Evan@680
   139
		m_scanLocation = 0;
Evan@680
   140
		m_scanStringLength = [m_scanString length];
Evan@680
   141
	}
Evan@680
   142
	return self;
Evan@680
   143
}
Evan@680
   144
Evan@680
   145
- (id)initWithAttributedString:(NSAttributedString *)inString usingStrictChecking:(BOOL)flag
Evan@680
   146
{
Evan@680
   147
	if((self = [self init])){
Evan@680
   148
		m_scanString = [[inString string] retain];
Evan@680
   149
		m_scanAttrString = [inString retain];
Evan@680
   150
		m_urlSchemes = [[NSDictionary alloc] initWithObjectsAndKeys:
Evan@680
   151
			@"ftp://", @"ftp",
Evan@680
   152
			nil];
Evan@680
   153
		m_strictChecking = flag;
Evan@680
   154
		m_scanLocation = 0;
Evan@680
   155
		m_scanStringLength = [m_scanString length];
Evan@680
   156
	}
David@0
   157
	return self;
David@0
   158
}
David@0
   159
David@0
   160
- (void)dealloc
David@0
   161
{
David@0
   162
	[m_scanString release];
David@0
   163
	[m_urlSchemes release];
David@0
   164
	if(m_scanAttrString) [m_scanAttrString release];
David@0
   165
	[super dealloc];
David@0
   166
}
David@0
   167
David@0
   168
#pragma mark URI Verification
David@0
   169
David@0
   170
- (BOOL)isValidURI
David@0
   171
{
David@0
   172
	return [AHHyperlinkScanner isStringValidURI:m_scanString usingStrict:m_strictChecking fromIndex:nil withStatus:nil];
David@0
   173
}
David@0
   174
David@0
   175
+ (BOOL)isStringValidURI:(NSString *)inString usingStrict:(BOOL)useStrictChecking fromIndex:(unsigned long *)index withStatus:(AH_URI_VERIFICATION_STATUS *)validStatus
David@0
   176
{
David@0
   177
    AH_BUFFER_STATE	 buf;  // buffer for flex to scan from
David@0
   178
	yyscan_t		 scanner; // pointer to the flex scanner opaque type
David@0
   179
	const char		*inStringEnc;
David@0
   180
    unsigned long	 encodedLength;
David@0
   181
David@0
   182
	if(!validStatus){
David@0
   183
		AH_URI_VERIFICATION_STATUS newStatus = AH_URL_INVALID;
David@0
   184
		validStatus = &newStatus;
David@0
   185
	}
David@0
   186
	
David@0
   187
	*validStatus = AH_URL_INVALID; // assume the URL is invalid
David@0
   188
David@0
   189
	// Find the fastest 8-bit wide encoding possible for the c string
David@0
   190
	NSStringEncoding stringEnc = [inString fastestEncoding];
David@0
   191
	if([@" " lengthOfBytesUsingEncoding:stringEnc] > 1U)
David@0
   192
		stringEnc = NSUTF8StringEncoding;
David@0
   193
David@0
   194
	if (!(inStringEnc = [inString cStringUsingEncoding:stringEnc])) {
David@0
   195
		return NO;
David@0
   196
	}
David@0
   197
	
David@0
   198
	
David@0
   199
	encodedLength = strlen(inStringEnc); // length of the string in utf-8
David@0
   200
    
David@0
   201
	// initialize the buffer (flex automatically switches to the buffer in this function)
David@0
   202
	AHlex_init(&scanner);
David@0
   203
    buf = AH_scan_string(inStringEnc, scanner);
David@0
   204
David@0
   205
    // call flex to parse the input
David@0
   206
    *validStatus = AHlex(scanner);
David@0
   207
	if(index) *index += AHget_leng(scanner);
David@0
   208
	
David@0
   209
    // condition for valid URI's
David@0
   210
    if(*validStatus == AH_URL_VALID || *validStatus == AH_MAILTO_VALID || *validStatus == AH_FILE_VALID){
David@0
   211
        AH_delete_buffer(buf, scanner); //remove the buffer from flex.
David@0
   212
        buf = NULL; //null the buffer pointer for safty's sake.
David@0
   213
        
David@0
   214
        // check that the whole string was matched by flex.
David@0
   215
        // this prevents silly things like "blah...com" from being seen as links
David@0
   216
        if(AHget_leng(scanner) == encodedLength){
David@0
   217
			AHlex_destroy(scanner);
David@0
   218
            return YES;
David@0
   219
        }
David@0
   220
    // condition for degenerate URL's (A.K.A. URI's sans specifiers), requres strict checking to be NO.
David@0
   221
    }else if((*validStatus == AH_URL_DEGENERATE || *validStatus == AH_MAILTO_DEGENERATE) && !useStrictChecking){
David@0
   222
        AH_delete_buffer(buf, scanner);
David@0
   223
        buf = NULL;
David@0
   224
        if(AHget_leng(scanner) == encodedLength){
David@0
   225
			AHlex_destroy(scanner);
David@0
   226
            return YES;
David@0
   227
        }
David@0
   228
    // if it ain't vaild, and it ain't degenerate, then it's invalid.
David@0
   229
    }else{
David@0
   230
        AH_delete_buffer(buf, scanner);
David@0
   231
        buf = NULL;
David@0
   232
		AHlex_destroy(scanner);
David@0
   233
        return NO;
David@0
   234
    }
David@0
   235
    // default case, if the range checking above fails.
David@0
   236
	AHlex_destroy(scanner);
David@0
   237
    return NO;
David@0
   238
}
David@0
   239
David@0
   240
#pragma mark Accessors
David@0
   241
David@0
   242
- (AHMarkedHyperlink *)nextURI
David@0
   243
{
David@0
   244
	NSRange	scannedRange;
David@0
   245
	unsigned long scannedLocation = m_scanLocation;
David@0
   246
	
David@0
   247
    // scan upto the next whitespace char so that we don't unnecessarity confuse flex
David@0
   248
    // otherwise we end up validating urls that look like this "http://www.adiumx.com/ <--cool"
David@0
   249
	[self _scanString:m_scanString charactersFromSet:startSet intoRange:nil fromIndex:&scannedLocation];
David@0
   250
David@0
   251
	// main scanning loop
David@0
   252
	while([self _scanString:m_scanString upToCharactersFromSet:skipSet intoRange:&scannedRange fromIndex:&scannedLocation]) {
Evan@680
   253
		BOOL foundUnpairedEnclosureCharacter = NO;
Evan@680
   254
Evan@680
   255
		// Check for and filter enclosures.  We can't add (, [, etc. to the skipSet as they may be in a URI
David@0
   256
		if([enclosureSet characterIsMember:[m_scanString characterAtIndex:scannedRange.location]]){
David@0
   257
			unsigned long encIdx = [enclosureStartArray indexOfObject:[m_scanString substringWithRange:NSMakeRange(scannedRange.location, 1)]];
David@0
   258
			NSRange encRange;
David@0
   259
			if(NSNotFound != encIdx) {
David@0
   260
				encRange = [m_scanString rangeOfString:[enclosureStopArray objectAtIndex:encIdx] options:NSBackwardsSearch range:scannedRange];
David@0
   261
				if(NSNotFound != encRange.location){
David@0
   262
					scannedRange.location++; scannedRange.length -= 2;
Evan@680
   263
				}else{
Evan@680
   264
					foundUnpairedEnclosureCharacter = YES;
David@0
   265
				}
David@0
   266
			}
David@0
   267
		}
David@0
   268
		if(!scannedRange.length) break;
David@0
   269
				
David@0
   270
		// Find balanced enclosure chars
David@0
   271
		NSRange longestEnclosure = [self _longestBalancedEnclosureInRange:scannedRange];
David@0
   272
		while (scannedRange.length > 2 && [endSet characterIsMember:[m_scanString characterAtIndex:(scannedRange.location + scannedRange.length - 1)]]) {
David@0
   273
			if((longestEnclosure.location + longestEnclosure.length) < scannedRange.length){
David@0
   274
				scannedRange.length--;
Evan@680
   275
				foundUnpairedEnclosureCharacter = NO;
David@0
   276
			}else break;
David@0
   277
		}
David@0
   278
		
David@0
   279
        // if we have a valid URL then save the scanned string, and make a SHMarkedHyperlink out of it.
David@0
   280
        // this way, we can preserve things like the matched string (to be converted to a NSURL),
David@0
   281
        // parent string, its validation status (valid, file, degenerate, etc), and its range in the parent string
David@0
   282
		AH_URI_VERIFICATION_STATUS	 validStatus;
David@0
   283
		NSString					*_scanString = nil;
David@0
   284
		if(3 < scannedRange.length) _scanString = [m_scanString substringWithRange:scannedRange];
Evan@680
   285
David@0
   286
        if((3 < scannedRange.length) && [[self class] isStringValidURI:_scanString usingStrict:m_strictChecking fromIndex:&m_scanLocation withStatus:&validStatus]){
David@0
   287
            AHMarkedHyperlink	*markedLink;
David@0
   288
			
David@0
   289
            //insert typical specifiers if the URL is degenerate
David@0
   290
            switch(validStatus){
David@0
   291
                case AH_URL_DEGENERATE:
David@0
   292
                {
David@0
   293
                    NSString *scheme = DEFAULT_URL_SCHEME;
David@0
   294
					unsigned long i = 0;
David@0
   295
David@0
   296
                    NSRange  firstComponent;
David@0
   297
					[self		  _scanString:_scanString
David@0
   298
						upToCharactersFromSet:hostnameComponentSeparatorSet
David@0
   299
									intoRange:&firstComponent
David@0
   300
									fromIndex:&i];
David@0
   301
David@0
   302
                    if(NSNotFound != firstComponent.location) {
David@0
   303
                    	NSString *hostnameScheme = [m_urlSchemes objectForKey:[_scanString substringWithRange:firstComponent]];
David@0
   304
                    	if(hostnameScheme) scheme = hostnameScheme;
David@0
   305
                    }
David@0
   306
David@0
   307
                    _scanString = [scheme stringByAppendingString:_scanString];
David@0
   308
David@0
   309
                    break;
David@0
   310
                }
David@0
   311
David@0
   312
                case AH_MAILTO_DEGENERATE:
David@0
   313
					_scanString = [@"mailto:" stringByAppendingString:_scanString];
David@0
   314
                    break;
David@0
   315
                default:
David@0
   316
                    break;
David@0
   317
            }
David@0
   318
            
David@0
   319
            //make a marked link
Stephen@331
   320
            markedLink = [[[AHMarkedHyperlink alloc] initWithString:_scanString
David@0
   321
											  withValidationStatus:validStatus
David@0
   322
													  parentString:m_scanString
Stephen@331
   323
														  andRange:scannedRange] autorelease];
Stephen@332
   324
            return [markedLink URL]? markedLink : nil;
David@0
   325
        }
David@0
   326
David@0
   327
		//step location after scanning a string
Evan@680
   328
		if (foundUnpairedEnclosureCharacter){
Evan@680
   329
			m_scanLocation++;
Evan@680
   330
		}else{
Evan@680
   331
			NSRange startRange = [m_scanString rangeOfCharacterFromSet:puncSet options:NSLiteralSearch range:scannedRange];
Evan@680
   332
			if (startRange.location != NSNotFound)
Evan@680
   333
				m_scanLocation = startRange.location + startRange.length;
Evan@680
   334
			else
Evan@680
   335
				m_scanLocation += scannedRange.length;
Evan@680
   336
		}
David@0
   337
			
David@0
   338
		scannedLocation = m_scanLocation;
David@0
   339
    }
David@0
   340
	
David@0
   341
    // if we're here, then NSScanner hit the end of the string
David@0
   342
    // set AHStringOffset to the string length here so we avoid potential infinite looping with many trailing spaces.
David@0
   343
    m_scanLocation = m_scanStringLength;
David@0
   344
    return nil;
David@0
   345
}
David@0
   346
David@0
   347
-(NSArray *)allURIs
David@0
   348
{
David@0
   349
    NSMutableArray		*rangeArray = [NSMutableArray array];
David@0
   350
    AHMarkedHyperlink	*markedLink;
David@0
   351
	unsigned long		 _holdOffset = m_scanLocation; // store location for later restoration;
David@0
   352
	m_scanLocation = 0; //set the offset to 0.
David@0
   353
    
David@0
   354
    //build an array of marked links.
David@0
   355
	while((markedLink = [self nextURI])){
David@0
   356
		[rangeArray addObject:markedLink];
David@0
   357
	}
David@0
   358
    m_scanLocation = _holdOffset; // reset scanLocation
David@0
   359
	return rangeArray;
David@0
   360
}
David@0
   361
catfish@2528
   362
-(NSAttributedString *)linkifiedString
David@0
   363
{
David@0
   364
	NSMutableAttributedString	*linkifiedString;
David@0
   365
	AHMarkedHyperlink			*markedLink;
David@0
   366
	BOOL						_didFindLinks = NO;
David@0
   367
	unsigned long				_holdOffset = m_scanLocation; // store location for later restoration;
David@0
   368
	
David@0
   369
	m_scanLocation = 0;
David@0
   370
David@0
   371
	if(m_scanAttrString) {
David@0
   372
		linkifiedString = [[m_scanAttrString mutableCopy] autorelease];
David@0
   373
	} else {
David@0
   374
		linkifiedString = [[[NSMutableAttributedString alloc] initWithString:m_scanString] autorelease];
David@0
   375
	}
David@0
   376
David@0
   377
	//for each SHMarkedHyperlink, add the proper URL to the proper range in the string.
David@0
   378
	while((markedLink = [self nextURI])) {
David@0
   379
		NSURL *markedLinkURL;
David@0
   380
		_didFindLinks = YES;
David@0
   381
		if((markedLinkURL = [markedLink URL])){
David@0
   382
			[linkifiedString addAttribute:NSLinkAttributeName
David@0
   383
									value:markedLinkURL
David@0
   384
									range:[markedLink range]];
David@0
   385
		}
David@0
   386
	}
David@0
   387
	
David@0
   388
	m_scanLocation = _holdOffset; // reset scanLocation
David@0
   389
		
David@0
   390
	return _didFindLinks? linkifiedString :
David@0
   391
						  m_scanAttrString ? [[m_scanAttrString retain] autorelease] : [[[NSMutableAttributedString alloc] initWithString:m_scanString] autorelease];
David@0
   392
}
David@0
   393
David@0
   394
-(unsigned long)scanLocation
David@0
   395
{
David@0
   396
	return m_scanLocation;
David@0
   397
}
David@0
   398
David@0
   399
- (void)setScanLocation:(unsigned int)location
David@0
   400
{
David@0
   401
	m_scanLocation = location;
David@0
   402
}
David@0
   403
David@0
   404
#pragma mark Below Here There Be Private Methods
David@0
   405
David@0
   406
- (NSRange)_longestBalancedEnclosureInRange:(NSRange)inRange
David@0
   407
{
David@0
   408
	NSMutableArray	*enclosureStack = nil, *enclosureArray = nil;
David@0
   409
	NSString  *matchChar = nil;
David@0
   410
	NSDictionary *encDict;
David@0
   411
	unsigned long encScanLocation = inRange.location;
David@0
   412
	
David@0
   413
	while(encScanLocation < inRange.length + inRange.location) {
David@0
   414
		[self _scanString:m_scanString upToCharactersFromSet:enclosureSet intoRange:nil fromIndex:&encScanLocation];
David@0
   415
			
David@0
   416
		if(encScanLocation >= (inRange.location + inRange.length)) break;
David@0
   417
			
David@0
   418
		matchChar = [m_scanString substringWithRange:NSMakeRange(encScanLocation, 1)];
David@0
   419
			
David@0
   420
		if([enclosureStartArray containsObject:matchChar]) {
David@0
   421
			encDict = [NSDictionary	dictionaryWithObjects:[NSArray arrayWithObjects:[NSNumber numberWithUnsignedLong:encScanLocation], matchChar, nil]
David@0
   422
												forKeys:encKeys];
David@0
   423
			if(!enclosureStack) enclosureStack = [NSMutableArray arrayWithCapacity:1];
David@0
   424
			[enclosureStack addObject:encDict];
David@0
   425
		}else if([enclosureStopArray containsObject:matchChar]) {
David@0
   426
			NSEnumerator *encEnumerator = [enclosureStack objectEnumerator];
David@0
   427
			while ((encDict = [encEnumerator nextObject])) {
David@0
   428
				unsigned long encTagIndex = [(NSNumber *)[encDict objectForKey:ENC_INDEX_KEY] unsignedLongValue];
David@0
   429
				unsigned long encStartIndex = [enclosureStartArray indexOfObjectIdenticalTo:[encDict objectForKey:ENC_CHAR_KEY]];
David@0
   430
				if([enclosureStopArray indexOfObjectIdenticalTo:matchChar] == encStartIndex) {
David@0
   431
					NSRange encRange = NSMakeRange(encTagIndex, encScanLocation - encTagIndex + 1);
David@0
   432
					if(!enclosureStack) enclosureStack = [NSMutableArray arrayWithCapacity:1];
David@0
   433
					if(!enclosureArray) enclosureArray = [NSMutableArray arrayWithCapacity:1];
David@0
   434
					[enclosureStack removeObject:encDict];
David@0
   435
					[enclosureArray addObject:NSStringFromRange(encRange)];
David@0
   436
					break;
David@0
   437
				}
David@0
   438
			}
David@0
   439
		}
David@0
   440
		if(encScanLocation < inRange.length + inRange.location)
David@0
   441
			encScanLocation++;
David@0
   442
	}
David@0
   443
	return (enclosureArray && [enclosureArray count])? NSRangeFromString([enclosureArray lastObject]) : NSMakeRange(0, 0);
David@0
   444
}
David@0
   445
David@0
   446
// functional replacement for -[NSScanner scanUpToCharactersFromSet:intoString:]
David@0
   447
- (BOOL)_scanString:(NSString *)inString upToCharactersFromSet:(NSCharacterSet *)inCharSet intoRange:(NSRange *)outRangeRef fromIndex:(unsigned long *)idx
David@0
   448
{
David@0
   449
	unichar			_curChar;
David@0
   450
	NSRange			_outRange;
David@0
   451
	unsigned long	_scanLength = [inString length];
David@0
   452
	unsigned long	_idx;
David@0
   453
	
David@0
   454
	if(_scanLength <= *idx) return NO;
David@0
   455
David@0
   456
	// Asorb skipSet
David@0
   457
	for(_idx = *idx; _scanLength > _idx; _idx++) {
David@0
   458
		_curChar = [inString characterAtIndex:_idx];
David@0
   459
		if(![skipSet characterIsMember:_curChar]) break;
David@0
   460
	}
David@0
   461
David@0
   462
	// scanUpTo:
David@0
   463
	for(*idx = _idx; _scanLength > _idx; _idx++) {
David@0
   464
		_curChar = [inString characterAtIndex:_idx];
David@0
   465
		if([inCharSet characterIsMember:_curChar] || [skipSet characterIsMember:_curChar]) break;
David@0
   466
	}
David@0
   467
	
David@0
   468
	_outRange = NSMakeRange(*idx, _idx - *idx);
David@0
   469
	*idx = _idx;
David@0
   470
	
David@0
   471
	if(_outRange.length) {
David@0
   472
		if(outRangeRef) *outRangeRef = _outRange;
David@0
   473
		return YES;
David@0
   474
	} else {
David@0
   475
		return NO;
David@0
   476
	}
David@0
   477
}
David@0
   478
David@0
   479
// functional replacement for -[NSScanner scanCharactersFromSet:intoString:]
David@0
   480
- (BOOL)_scanString:(NSString *)inString charactersFromSet:(NSCharacterSet *)inCharSet intoRange:(NSRange *)outRangeRef fromIndex:(unsigned long *)idx
David@0
   481
{
David@0
   482
	unichar			_curChar;
David@0
   483
	NSRange			_outRange;
David@0
   484
	unsigned long	_scanLength = [inString length];
David@0
   485
	unsigned long	_idx = *idx;
David@0
   486
	
David@0
   487
	if(_scanLength <= _idx) return NO;
David@0
   488
David@0
   489
	// Asorb skipSet
David@0
   490
	for(_idx = *idx; _scanLength > _idx; _idx++) {
David@0
   491
		_curChar = [inString characterAtIndex:_idx];
David@0
   492
		if(![skipSet characterIsMember:_curChar]) break;
David@0
   493
	}
David@0
   494
David@0
   495
	// scanCharacters:
David@0
   496
	for(*idx = _idx; _scanLength > _idx; _idx++) {
David@0
   497
		_curChar = [inString characterAtIndex:_idx];
David@0
   498
		if(![inCharSet characterIsMember:_curChar]) break;
David@0
   499
	}
David@0
   500
David@0
   501
	_outRange = NSMakeRange(*idx, _idx - *idx);
David@0
   502
	*idx = _idx;
David@0
   503
	
David@0
   504
	if(_outRange.length) {
David@0
   505
		if(outRangeRef) *outRangeRef = _outRange;
David@0
   506
		return YES;
David@0
   507
	} else {
David@0
   508
		return NO;
David@0
   509
	}
David@0
   510
}
David@0
   511
@end