串(四):.NET Framework String类的实现(下)
2012-04-25 15:38:22一、SubString方法的实现
如果理解了上一篇文章中介绍的FastAllocateString方法与wstrcpy方法,理解SubString方法的实现就会比较容易。此方法的两个重载都会先调用一个参数的校验函数InternalSubStringWithChecks。如果没有发生异常就会继续调用内部实现的核心方法InternalSubString,该方法在内部实现获取[startIndex, startIndex + length]之间的字串。具体代码如下
//返回从startIndex开始到结束的字符串
public String Substring (int startIndex) {
return this.Substring (startIndex, Length-startIndex);
}
public String Substring (int startIndex, int length) {
return InternalSubStringWithChecks(startIndex, length, false);
}
//采用手术室清洁策略来对参数进行检验
internal String InternalSubStringWithChecks (int startIndex, int length, bool fAlwaysCopy) {
int thisLength = Length;
if (startIndex<0) {
throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_StartIndex"));
}
if (startIndex > thisLength) {
throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_StartIndexLargerThanLength"));
}
if (length<0) {
throw new ArgumentOutOfRangeException("length", Environment.GetResourceString("ArgumentOutOfRange_NegativeLength"));
}
if (startIndex > thisLength-length) {
throw new ArgumentOutOfRangeException("length", Environment.GetResourceString("ArgumentOutOfRange_IndexLength"));
}
if( length == 0) {
return String.Empty;
}
return InternalSubString(startIndex, length, fAlwaysCopy);
}
//substring的内部实现,获取[startIndex, startIndex + length]之间的字串
unsafe string InternalSubString(int startIndex, int length, bool fAlwaysCopy) {
BCLDebug.Assert( startIndex >= 0 && startIndex <= this.Length, "StartIndex is out of range!");
BCLDebug.Assert( length >= 0 && startIndex <= this.Length - length, "length is out of range!");
if( startIndex == 0 && length == this.Length && !fAlwaysCopy) {
return this;
}
String result = FastAllocateString(length);
fixed(char* dest = &result.m_firstChar)
fixed(char* src = &this.m_firstChar) {
wstrcpy(dest, src + startIndex, length);
}
return result;
}
二、Split方法的实现
String类中Split方法提供6中不同的重载。我选其中一个重载方法的实现来介绍Split方法。其他的重载要不实现代码相似,要不间接调用此重载。
public String[] Split(char[] separator, int count, StringSplitOptions options) {
if (count<0) {
throw new ArgumentOutOfRangeException("count",
Environment.GetResourceString("ArgumentOutOfRange_NegativeCount"));
}
if( options < StringSplitOptions.None || options > StringSplitOptions.RemoveEmptyEntries) {
throw new ArgumentException(Environment.GetResourceString("Arg_EnumIllegalVal", (int)options));
}
bool omitEmptyEntries = (options == StringSplitOptions.RemoveEmptyEntries);
if( (count == 0) || (omitEmptyEntries && this.Length ==0)) {
return new String[0];
}
//获取分割符字符出现在原字符串的次数与下标索引数组
int[] sepList = new int[Length];
int numReplaces = MakeSeparatorList(separator, ref sepList);
if (0 == numReplaces || count == 1) {
String[] stringArray = new String[1];
stringArray[0] = this;
return stringArray;
}
if(omitEmptyEntries) {
return InternalSplitOmitEmptyEntries(sepList, null, numReplaces, count);
}
else {
return InternalSplitKeepEmptyEntries(sepList, null, numReplaces, count);
}
}
从上面方法可以看出,其核心代码中有3个辅助方法MakeSeparatorList、InternalSplitOmitEmptyEntries和InternalSplitKeepEmptyEntries。
MakeSeparatorList方法的功能是:创建原字符串经过separator分割后的分段索引数组sepList,实现也比较容易
//创建原字符串经过separator分割后的分段索引数组sepList
private unsafe int MakeSeparatorList(char[] separator, ref int[] sepList) {
int foundCount=0;
if (separator == null || separator.Length ==0) {
//分割符为null,默认空格为分割符号
fixed (char* pwzChars = &this.m_firstChar) {
for (int i=0; i < Length && foundCount < sepList.Length; i++) {
if (Char.IsWhiteSpace(pwzChars[i])) {
sepList[foundCount++]=i;
}
}
}
}
else {
int sepListCount = sepList.Length;
int sepCount = separator.Length;
//分割符数组不为空,获取分割符数组中的内容进行处理
fixed (char* pwzChars = &this.m_firstChar, pSepChars = separator) {
for (int i=0; i< Length && foundCount < sepListCount; i++) {
char * pSep = pSepChars;
for( int j =0; j < sepCount; j++, pSep++) {
if ( pwzChars[i] == *pSep) {
sepList[foundCount++]=i;
break;
}
}
}
}
}
return foundCount;
}
InternalSplitOmitEmptyEntries方法和InternalSplitKeepEmptyEntries方法功能相似,前者在分割后省略空项,后者则保留空项。具体实现如下:
private String[] InternalSplitKeepEmptyEntries(Int32 [] sepList, Int32 [] lengthList, Int32 numReplaces, int count) {
BCLDebug.Assert( count >= 2, "Count>=2");
int currIndex = 0;
int arrIndex = 0;
count--;
int numActualReplaces = (numReplaces < count) ? numReplaces : count;
String[] splitStrings = new String[numActualReplaces+1];
for (int i = 0; i < numActualReplaces && currIndex < Length; i++) {
//第i个分割符出现的位置位于当前处理位置的后面,获取从当前位置到第i个分割符之前的字符串作为结果保存在数组中
splitStrings[arrIndex++] = Substring(currIndex, sepList[i]-currIndex );
currIndex=sepList[i] + ((lengthList == null) ? 1 : lengthList[i]);
}
if (currIndex < Length && numActualReplaces >= 0) {
splitStrings[arrIndex] = Substring(currIndex);
}
else if (arrIndex==numActualReplaces) {
splitStrings[arrIndex] = String.Empty;
}
return splitStrings;
}
private String[] InternalSplitOmitEmptyEntries(Int32[] sepList, Int32[] lengthList, Int32 numReplaces, int count) {
BCLDebug.Assert( count >= 2, "Count>=2");
//计算拆分后的字串数量,并且生成对应大小的数组
int maxItems = (numReplaces < count) ? (numReplaces+1): count ;
String[] splitStrings = new String[maxItems];
int currIndex = 0;
int arrIndex = 0;
for(int i=0; i< numReplaces && currIndex < Length; i++) {
if( sepList[i]-currIndex > 0) {
//第i个分割符出现的位置位于当前处理位置的后面,获取从当前位置到第i个分割符之前的字符串作为结果保存在数组中
splitStrings[arrIndex++] = Substring(currIndex, sepList[i]-currIndex );
}
currIndex=sepList[i] + ((lengthList == null) ? 1 : lengthList[i]);
if( arrIndex == count -1 ) {
//当结果字符串数量已满时,跳过后面的操作
while( i < numReplaces - 1 && currIndex == sepList[++i]) {
currIndex += ((lengthList == null) ? 1 : lengthList[i]);
}
break;
}
}
BCLDebug.Assert( arrIndex < maxItems, "arrIndex < maxItems");
//将从最后一个分割符到结尾的字符串填充到数组中
if (currIndex< Length) {
splitStrings[arrIndex++] = Substring(currIndex);
}
//如果splitStrings数组没有填充满,那么创建合适大小的数组,并且将非空的数据项插入到该数组中
//注意该过程会产生复制与垃圾对象开销
String[] stringArray = splitStrings;
if( arrIndex!= maxItems) {
stringArray = new String[arrIndex];
for( int j = 0; j < arrIndex; j++) {
stringArray[j] = splitStrings[j];
}
}
return stringArray;
}
三、Join方法的实现
Join方法与Split方法功能相反,它将通过连接符合将数组拼接成字符串。我们来看它的具体实现:
//通过连接符合将value数组中的字符串连接起来
public static String Join (String separator, String[] value) {
if (value==null) {
throw new ArgumentNullException("value");
}
return Join(separator, value, 0, value.Length);
}
//从指定位置开始的count个字符,通过连接符合将value数组中的字符连接起来
public unsafe static String Join(String separator, String[] value, int startIndex, int count) {
if (separator == null) {
separator = String.Empty;
}
if (value == null) {
throw new ArgumentNullException("value");
}
if (startIndex < 0) {
throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_StartIndex"));
}
if (count < 0) {
throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_NegativeCount"));
}
if (startIndex > value.Length - count) {
throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
}
if (count == 0) {
return String.Empty;
}
int jointLength = 0;
//计算出value数组的终止下标
int endIndex = startIndex + count - 1;
//计算结构字符串的总长度
for (int stringToJoinIndex = startIndex; stringToJoinIndex <= endIndex; stringToJoinIndex++) {
if (value[stringToJoinIndex] != null) {
jointLength += value[stringToJoinIndex].Length;
}
}
//追加连接字符所占用的空间
jointLength += (count - 1) * separator.Length;
if ((jointLength < 0) || ((jointLength + 1) < 0) ) {
throw new OutOfMemoryException();
}
if (jointLength == 0) {
return String.Empty;
}
//获取字符串对象
string jointString = FastAllocateString( jointLength );
//循环迭代进行字符串连接
fixed (char * pointerToJointString = &jointString.m_firstChar) {
//创建字符串缓冲区操作
//该对象不会创建新的字符串缓冲区副本
//该对象的构造函数认为传入的pointerToJointString缓冲区内当前没有字符串存在
UnSafeCharBuffer charBuffer = new UnSafeCharBuffer( pointerToJointString, jointLength);
//迭代依次实现字符串的拼接
charBuffer.AppendString( value[startIndex] );
for (int stringToJoinIndex = startIndex + 1; stringToJoinIndex <= endIndex; stringToJoinIndex++) {
charBuffer.AppendString( separator );
charBuffer.AppendString( value[stringToJoinIndex] );
}
BCLDebug.Assert(*(pointerToJointString + charBuffer.Length) == '\0', "String must be null-terminated!");
}
return jointString;
}
四、Compare方法的实现
Compate方法的比较分为有语言性的与非语言性的两种。如果是有语言性,会通过当前系统文化区域的设置进行比较;如果是非语言性的会进行是序数比较。该方法重载较多,本篇文章也挑其中一种方法来介绍它的实现:
//根据比较类型comparisonType对字符strA与strB进行比较
//比较类型可以是有语言性的,也可以是非语言性的
//对于有语言性的比较,通过系统当前文化区域设置CurrentCulture进行比较
//对于非语言性的序数比较,通过String类或者非托管代码实现比较
public static int Compare(String strA, String strB, StringComparison comparisonType) {
if( comparisonType < StringComparison.CurrentCulture || comparisonType > StringComparison.OrdinalIgnoreCase) {
throw new ArgumentException(Environment.GetResourceString("NotSupported_StringComparison"), "comparisonType");
}
if ((Object)strA == (Object)strB) {
return 0;
}
if( strA == null) {
return -1;
}
if( strB == null) {
return 1;
}
//根据不同的类型进行比较
switch (comparisonType) {
case StringComparison.CurrentCulture:
return CultureInfo.CurrentCulture.CompareInfo.Compare(strA, strB, CompareOptions.None);
case StringComparison.CurrentCultureIgnoreCase:
return CultureInfo.CurrentCulture.CompareInfo.Compare(strA, strB, CompareOptions.IgnoreCase);
case StringComparison.InvariantCulture:
return CultureInfo.InvariantCulture.CompareInfo.Compare(strA, strB, CompareOptions.None);
case StringComparison.InvariantCultureIgnoreCase:
return c.InvariantCulture.CompareInfo.Compare(strA, strB, CompareOptions.IgnoreCase);
case StringComparison.Ordinal:
return CompareOrdinalHelper(strA, strB);
case StringComparison.OrdinalIgnoreCase:
if (strA.IsAscii() && strB.IsAscii()) {
return (String.nativeCompareOrdinal(strA, strB, true));
}
return TextInfo.CompareOrdinalIgnoreCase(strA, strB);
default:
throw new NotSupportedException(Environment.GetResourceString("NotSupported_StringComparison"));
}
}
从代码中我们可以得知有语言性的比较会调用CultureInfo类和CompareInfo类,具体的实现这里就不介绍了,可以参见对应的cs文件。这里重点看一下序数比较方法CompareOrdinalHelper。该方法为了提高效率,该方法以长度10为分界分成两种不同的方式来进行比较,具体代码如下
//对字符串strA与strB进行序数比较
//为了提高效率,当strA与strB两者的最小长度大于等于10的时候,会首先执行快速比较代码,如果其中已经发现不等,则计算不等位置,然后直接返回
//否则进入较慢的比较执行代码
private unsafe static int CompareOrdinalHelper(String strA, String strB)
{
BCLDebug.Assert(strA != null && strB != null, "strings cannot be null!");
int length = Math.Min(strA.Length, strB.Length);
int diffOffset = -1;
fixed(char* ap = strA) fixed(char* bp = strB)
{
char* a = ap;
char* b = bp;
//进行快速比较过程,每次前进10个字符
while (length >= 10)
{
if (*(int*)a != *(int*)b) {
diffOffset = 0;
break;
}
if (*(int*)(a+2) != *(int*)(b+2)) {
diffOffset = 2;
break;
}
if (*(int*)(a+4) != *(int*)(b+4)) {
diffOffset = 4;
break;
}
if (*(int*)(a+6) != *(int*)(b+6)) {
diffOffset = 6;
break;
}
if (*(int*)(a+8) != *(int*)(b+8)) {
diffOffset = 8;
break;
}
a += 10;
b += 10;
length -= 10;
}
if( diffOffset != -1) {
//计算发生不等的索引位置
a += diffOffset;
b += diffOffset;
int order;
if ( (order = (int)*a - (int)*b) != 0) {
return order;
}
BCLDebug.Assert( *(a+1) != *(b+1), "This byte must be different if we reach here!");
return ((int)*(a+1) - (int)*(b+1));
}
//长度小于10,进入较慢的比较
while (length > 0) {
if (*(int*)a != *(int*)b) {
break;
}
a += 2;
b += 2;
length -= 2;
}
if( length > 0) {
int c;
if ( (c = (int)*a - (int)*b) != 0) {
//不等发生在低位
return c;
}
//不等发生在高位
BCLDebug.Assert( *(a+1) != *(b+1), "This byte must be different if we reach here!");
return ((int)*(a+1) - (int)*(b+1));
}
//返回两个字符串的长度差
return strA.Length - strB.Length;
}
}
五、Trim方法的实现
该方法删除在原字符串中trimChars指定的字符串,内部提供一个辅助方法TrimHelper来实现核心逻辑
internal static readonly char[] WhitespaceChars =
{ (char) 0x9, (char) 0xA, (char) 0xB, (char) 0xC, (char) 0xD, (char) 0x20, (char) 0x85,
(char) 0xA0, (char)0x1680,
(char) 0x2000, (char) 0x2001, (char) 0x2002, (char) 0x2003, (char) 0x2004, (char) 0x2005,
(char) 0x2006, (char) 0x2007, (char) 0x2008, (char) 0x2009, (char) 0x200A, (char) 0x200B,
(char) 0x2028, (char) 0x2029,
(char) 0x3000, (char) 0xFEFF };
//删除在原字符串中trimChars指定的字符串
public String Trim(params char[] trimChars) {
if (null==trimChars || trimChars.Length == 0) {
trimChars=WhitespaceChars;
}
return TrimHelper(trimChars,TrimBoth);
}
public String TrimStart(params char[] trimChars) {
if (null==trimChars || trimChars.Length == 0) {
trimChars=WhitespaceChars;
}
return TrimHelper(trimChars,TrimHead);
}
public String TrimEnd(params char[] trimChars) {
if (null==trimChars || trimChars.Length == 0) {
trimChars=WhitespaceChars;
}
return TrimHelper(trimChars,TrimTail);
}
public String Trim() {
return TrimHelper(WhitespaceChars,TrimBoth);
}
//根据删除类型trimType,删除trimChars中的字串
private String TrimHelper(char[] trimChars, int trimType) {
int end = this.Length-1;
int start=0;
if (trimType !=TrimTail) {
//检查字符串首部开始应该删除的字符,检查后start指向第一个保留的字符的索引
for (start=0; start < this.Length; start++) {
int i = 0;
char ch = this[start];
for( i = 0; i < trimChars.Length; i++) {
if( trimChars[i] == ch) break;
}
if( i == trimChars.Length) {
break;
}
}
}
if (trimType !=TrimHead) {
//检查字符串尾部开始应该删除的字符,检查后end指向最后一个保留的字符的索引
for (end= Length -1; end >= start; end--) {
int i = 0;
char ch = this[end];
for(i = 0; i < trimChars.Length; i++) {
if( trimChars[i] == ch) break;
}
if( i == trimChars.Length) {
break;
}
}
}
int len = end -start + 1;
//如果没有字符被删除,返回字符串自身
if (len == this.Length) {
return this;
}
else {
if( len == 0) {
return String.Empty;
}
//返回start到end的字串
return InternalSubString(start, len, false);
}
}
String类的实现就谈到这里了,还有一些方法这次没有谈到,比如IndexOf,Format,PadLeft,PadRight。象IndexOf方法String类会调用CompareInfo类的IndexOf方法,具体请参见CompareInfo类。Format方法则是通过StringBuilder类的AppendFormat方法来实现的。其他方法就不一一介绍了,象PadLeft,PadRight这些未讲到的方法都本都是通过非托管C++代码来实现的。