diff options
author | chriseth <chris@ethereum.org> | 2017-07-05 18:28:15 +0800 |
---|---|---|
committer | chriseth <chris@ethereum.org> | 2017-07-05 18:39:55 +0800 |
commit | ac84b36144f746662e5ddb984d283e053c7d06ba (patch) | |
tree | f50ee438a384e60574a4c28ea32d2b6ae2315795 /test/compilationTests/stringutils/strings.sol | |
parent | 05a26fc98c1201057c618c536ca0537e456c9b15 (diff) | |
download | dexon-solidity-ac84b36144f746662e5ddb984d283e053c7d06ba.tar.gz dexon-solidity-ac84b36144f746662e5ddb984d283e053c7d06ba.tar.zst dexon-solidity-ac84b36144f746662e5ddb984d283e053c7d06ba.zip |
Added various contracts for testing.
Diffstat (limited to 'test/compilationTests/stringutils/strings.sol')
-rw-r--r-- | test/compilationTests/stringutils/strings.sol | 709 |
1 files changed, 709 insertions, 0 deletions
diff --git a/test/compilationTests/stringutils/strings.sol b/test/compilationTests/stringutils/strings.sol new file mode 100644 index 00000000..0a2d68bd --- /dev/null +++ b/test/compilationTests/stringutils/strings.sol @@ -0,0 +1,709 @@ +/* + * @title String & slice utility library for Solidity contracts. + * @author Nick Johnson <arachnid@notdot.net> + * + * @dev Functionality in this library is largely implemented using an + * abstraction called a 'slice'. A slice represents a part of a string - + * anything from the entire string to a single character, or even no + * characters at all (a 0-length slice). Since a slice only has to specify + * an offset and a length, copying and manipulating slices is a lot less + * expensive than copying and manipulating the strings they reference. + * + * To further reduce gas costs, most functions on slice that need to return + * a slice modify the original one instead of allocating a new one; for + * instance, `s.split(".")` will return the text up to the first '.', + * modifying s to only contain the remainder of the string after the '.'. + * In situations where you do not want to modify the original slice, you + * can make a copy first with `.copy()`, for example: + * `s.copy().split(".")`. Try and avoid using this idiom in loops; since + * Solidity has no memory management, it will result in allocating many + * short-lived slices that are later discarded. + * + * Functions that return two slices come in two versions: a non-allocating + * version that takes the second slice as an argument, modifying it in + * place, and an allocating version that allocates and returns the second + * slice; see `nextRune` for example. + * + * Functions that have to copy string data will return strings rather than + * slices; these can be cast back to slices for further processing if + * required. + * + * For convenience, some functions are provided with non-modifying + * variants that create a new slice and return both; for instance, + * `s.splitNew('.')` leaves s unmodified, and returns two values + * corresponding to the left and right parts of the string. + */ +library strings { + struct slice { + uint _len; + uint _ptr; + } + + function memcpy(uint dest, uint src, uint len) private { + // Copy word-length chunks while possible + for(; len >= 32; len -= 32) { + assembly { + mstore(dest, mload(src)) + } + dest += 32; + src += 32; + } + + // Copy remaining bytes + uint mask = 256 ** (32 - len) - 1; + assembly { + let srcpart := and(mload(src), not(mask)) + let destpart := and(mload(dest), mask) + mstore(dest, or(destpart, srcpart)) + } + } + + /* + * @dev Returns a slice containing the entire string. + * @param self The string to make a slice from. + * @return A newly allocated slice containing the entire string. + */ + function toSlice(string self) internal returns (slice) { + uint ptr; + assembly { + ptr := add(self, 0x20) + } + return slice(bytes(self).length, ptr); + } + + /* + * @dev Returns the length of a null-terminated bytes32 string. + * @param self The value to find the length of. + * @return The length of the string, from 0 to 32. + */ + function len(bytes32 self) internal returns (uint) { + uint ret; + if (self == 0) + return 0; + if (self & 0xffffffffffffffffffffffffffffffff == 0) { + ret += 16; + self = bytes32(uint(self) / 0x100000000000000000000000000000000); + } + if (self & 0xffffffffffffffff == 0) { + ret += 8; + self = bytes32(uint(self) / 0x10000000000000000); + } + if (self & 0xffffffff == 0) { + ret += 4; + self = bytes32(uint(self) / 0x100000000); + } + if (self & 0xffff == 0) { + ret += 2; + self = bytes32(uint(self) / 0x10000); + } + if (self & 0xff == 0) { + ret += 1; + } + return 32 - ret; + } + + /* + * @dev Returns a slice containing the entire bytes32, interpreted as a + * null-termintaed utf-8 string. + * @param self The bytes32 value to convert to a slice. + * @return A new slice containing the value of the input argument up to the + * first null. + */ + function toSliceB32(bytes32 self) internal returns (slice ret) { + // Allocate space for `self` in memory, copy it there, and point ret at it + assembly { + let ptr := mload(0x40) + mstore(0x40, add(ptr, 0x20)) + mstore(ptr, self) + mstore(add(ret, 0x20), ptr) + } + ret._len = len(self); + } + + /* + * @dev Returns a new slice containing the same data as the current slice. + * @param self The slice to copy. + * @return A new slice containing the same data as `self`. + */ + function copy(slice self) internal returns (slice) { + return slice(self._len, self._ptr); + } + + /* + * @dev Copies a slice to a new string. + * @param self The slice to copy. + * @return A newly allocated string containing the slice's text. + */ + function toString(slice self) internal returns (string) { + var ret = new string(self._len); + uint retptr; + assembly { retptr := add(ret, 32) } + + memcpy(retptr, self._ptr, self._len); + return ret; + } + + /* + * @dev Returns the length in runes of the slice. Note that this operation + * takes time proportional to the length of the slice; avoid using it + * in loops, and call `slice.empty()` if you only need to know whether + * the slice is empty or not. + * @param self The slice to operate on. + * @return The length of the slice in runes. + */ + function len(slice self) internal returns (uint) { + // Starting at ptr-31 means the LSB will be the byte we care about + var ptr = self._ptr - 31; + var end = ptr + self._len; + for (uint len = 0; ptr < end; len++) { + uint8 b; + assembly { b := and(mload(ptr), 0xFF) } + if (b < 0x80) { + ptr += 1; + } else if(b < 0xE0) { + ptr += 2; + } else if(b < 0xF0) { + ptr += 3; + } else if(b < 0xF8) { + ptr += 4; + } else if(b < 0xFC) { + ptr += 5; + } else { + ptr += 6; + } + } + return len; + } + + /* + * @dev Returns true if the slice is empty (has a length of 0). + * @param self The slice to operate on. + * @return True if the slice is empty, False otherwise. + */ + function empty(slice self) internal returns (bool) { + return self._len == 0; + } + + /* + * @dev Returns a positive number if `other` comes lexicographically after + * `self`, a negative number if it comes before, or zero if the + * contents of the two slices are equal. Comparison is done per-rune, + * on unicode codepoints. + * @param self The first slice to compare. + * @param other The second slice to compare. + * @return The result of the comparison. + */ + function compare(slice self, slice other) internal returns (int) { + uint shortest = self._len; + if (other._len < self._len) + shortest = other._len; + + var selfptr = self._ptr; + var otherptr = other._ptr; + for (uint idx = 0; idx < shortest; idx += 32) { + uint a; + uint b; + assembly { + a := mload(selfptr) + b := mload(otherptr) + } + if (a != b) { + // Mask out irrelevant bytes and check again + uint mask = ~(2 ** (8 * (32 - shortest + idx)) - 1); + var diff = (a & mask) - (b & mask); + if (diff != 0) + return int(diff); + } + selfptr += 32; + otherptr += 32; + } + return int(self._len) - int(other._len); + } + + /* + * @dev Returns true if the two slices contain the same text. + * @param self The first slice to compare. + * @param self The second slice to compare. + * @return True if the slices are equal, false otherwise. + */ + function equals(slice self, slice other) internal returns (bool) { + return compare(self, other) == 0; + } + + /* + * @dev Extracts the first rune in the slice into `rune`, advancing the + * slice to point to the next rune and returning `self`. + * @param self The slice to operate on. + * @param rune The slice that will contain the first rune. + * @return `rune`. + */ + function nextRune(slice self, slice rune) internal returns (slice) { + rune._ptr = self._ptr; + + if (self._len == 0) { + rune._len = 0; + return rune; + } + + uint len; + uint b; + // Load the first byte of the rune into the LSBs of b + assembly { b := and(mload(sub(mload(add(self, 32)), 31)), 0xFF) } + if (b < 0x80) { + len = 1; + } else if(b < 0xE0) { + len = 2; + } else if(b < 0xF0) { + len = 3; + } else { + len = 4; + } + + // Check for truncated codepoints + if (len > self._len) { + rune._len = self._len; + self._ptr += self._len; + self._len = 0; + return rune; + } + + self._ptr += len; + self._len -= len; + rune._len = len; + return rune; + } + + /* + * @dev Returns the first rune in the slice, advancing the slice to point + * to the next rune. + * @param self The slice to operate on. + * @return A slice containing only the first rune from `self`. + */ + function nextRune(slice self) internal returns (slice ret) { + nextRune(self, ret); + } + + /* + * @dev Returns the number of the first codepoint in the slice. + * @param self The slice to operate on. + * @return The number of the first codepoint in the slice. + */ + function ord(slice self) internal returns (uint ret) { + if (self._len == 0) { + return 0; + } + + uint word; + uint len; + uint div = 2 ** 248; + + // Load the rune into the MSBs of b + assembly { word:= mload(mload(add(self, 32))) } + var b = word / div; + if (b < 0x80) { + ret = b; + len = 1; + } else if(b < 0xE0) { + ret = b & 0x1F; + len = 2; + } else if(b < 0xF0) { + ret = b & 0x0F; + len = 3; + } else { + ret = b & 0x07; + len = 4; + } + + // Check for truncated codepoints + if (len > self._len) { + return 0; + } + + for (uint i = 1; i < len; i++) { + div = div / 256; + b = (word / div) & 0xFF; + if (b & 0xC0 != 0x80) { + // Invalid UTF-8 sequence + return 0; + } + ret = (ret * 64) | (b & 0x3F); + } + + return ret; + } + + /* + * @dev Returns the keccak-256 hash of the slice. + * @param self The slice to hash. + * @return The hash of the slice. + */ + function keccak(slice self) internal returns (bytes32 ret) { + assembly { + ret := sha3(mload(add(self, 32)), mload(self)) + } + } + + /* + * @dev Returns true if `self` starts with `needle`. + * @param self The slice to operate on. + * @param needle The slice to search for. + * @return True if the slice starts with the provided text, false otherwise. + */ + function startsWith(slice self, slice needle) internal returns (bool) { + if (self._len < needle._len) { + return false; + } + + if (self._ptr == needle._ptr) { + return true; + } + + bool equal; + assembly { + let len := mload(needle) + let selfptr := mload(add(self, 0x20)) + let needleptr := mload(add(needle, 0x20)) + equal := eq(sha3(selfptr, len), sha3(needleptr, len)) + } + return equal; + } + + /* + * @dev If `self` starts with `needle`, `needle` is removed from the + * beginning of `self`. Otherwise, `self` is unmodified. + * @param self The slice to operate on. + * @param needle The slice to search for. + * @return `self` + */ + function beyond(slice self, slice needle) internal returns (slice) { + if (self._len < needle._len) { + return self; + } + + bool equal = true; + if (self._ptr != needle._ptr) { + assembly { + let len := mload(needle) + let selfptr := mload(add(self, 0x20)) + let needleptr := mload(add(needle, 0x20)) + equal := eq(sha3(selfptr, len), sha3(needleptr, len)) + } + } + + if (equal) { + self._len -= needle._len; + self._ptr += needle._len; + } + + return self; + } + + /* + * @dev Returns true if the slice ends with `needle`. + * @param self The slice to operate on. + * @param needle The slice to search for. + * @return True if the slice starts with the provided text, false otherwise. + */ + function endsWith(slice self, slice needle) internal returns (bool) { + if (self._len < needle._len) { + return false; + } + + var selfptr = self._ptr + self._len - needle._len; + + if (selfptr == needle._ptr) { + return true; + } + + bool equal; + assembly { + let len := mload(needle) + let needleptr := mload(add(needle, 0x20)) + equal := eq(sha3(selfptr, len), sha3(needleptr, len)) + } + + return equal; + } + + /* + * @dev If `self` ends with `needle`, `needle` is removed from the + * end of `self`. Otherwise, `self` is unmodified. + * @param self The slice to operate on. + * @param needle The slice to search for. + * @return `self` + */ + function until(slice self, slice needle) internal returns (slice) { + if (self._len < needle._len) { + return self; + } + + var selfptr = self._ptr + self._len - needle._len; + bool equal = true; + if (selfptr != needle._ptr) { + assembly { + let len := mload(needle) + let needleptr := mload(add(needle, 0x20)) + equal := eq(sha3(selfptr, len), sha3(needleptr, len)) + } + } + + if (equal) { + self._len -= needle._len; + } + + return self; + } + + // Returns the memory address of the first byte of the first occurrence of + // `needle` in `self`, or the first byte after `self` if not found. + function findPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private returns (uint) { + uint ptr; + uint idx; + + if (needlelen <= selflen) { + if (needlelen <= 32) { + // Optimized assembly for 68 gas per byte on short strings + assembly { + let mask := not(sub(exp(2, mul(8, sub(32, needlelen))), 1)) + let needledata := and(mload(needleptr), mask) + let end := add(selfptr, sub(selflen, needlelen)) + ptr := selfptr + loop: + jumpi(exit, eq(and(mload(ptr), mask), needledata)) + ptr := add(ptr, 1) + jumpi(loop, lt(sub(ptr, 1), end)) + ptr := add(selfptr, selflen) + exit: + } + return ptr; + } else { + // For long needles, use hashing + bytes32 hash; + assembly { hash := sha3(needleptr, needlelen) } + ptr = selfptr; + for (idx = 0; idx <= selflen - needlelen; idx++) { + bytes32 testHash; + assembly { testHash := sha3(ptr, needlelen) } + if (hash == testHash) + return ptr; + ptr += 1; + } + } + } + return selfptr + selflen; + } + + // Returns the memory address of the first byte after the last occurrence of + // `needle` in `self`, or the address of `self` if not found. + function rfindPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private returns (uint) { + uint ptr; + + if (needlelen <= selflen) { + if (needlelen <= 32) { + // Optimized assembly for 69 gas per byte on short strings + assembly { + let mask := not(sub(exp(2, mul(8, sub(32, needlelen))), 1)) + let needledata := and(mload(needleptr), mask) + ptr := add(selfptr, sub(selflen, needlelen)) + loop: + jumpi(ret, eq(and(mload(ptr), mask), needledata)) + ptr := sub(ptr, 1) + jumpi(loop, gt(add(ptr, 1), selfptr)) + ptr := selfptr + jump(exit) + ret: + ptr := add(ptr, needlelen) + exit: + } + return ptr; + } else { + // For long needles, use hashing + bytes32 hash; + assembly { hash := sha3(needleptr, needlelen) } + ptr = selfptr + (selflen - needlelen); + while (ptr >= selfptr) { + bytes32 testHash; + assembly { testHash := sha3(ptr, needlelen) } + if (hash == testHash) + return ptr + needlelen; + ptr -= 1; + } + } + } + return selfptr; + } + + /* + * @dev Modifies `self` to contain everything from the first occurrence of + * `needle` to the end of the slice. `self` is set to the empty slice + * if `needle` is not found. + * @param self The slice to search and modify. + * @param needle The text to search for. + * @return `self`. + */ + function find(slice self, slice needle) internal returns (slice) { + uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr); + self._len -= ptr - self._ptr; + self._ptr = ptr; + return self; + } + + /* + * @dev Modifies `self` to contain the part of the string from the start of + * `self` to the end of the first occurrence of `needle`. If `needle` + * is not found, `self` is set to the empty slice. + * @param self The slice to search and modify. + * @param needle The text to search for. + * @return `self`. + */ + function rfind(slice self, slice needle) internal returns (slice) { + uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr); + self._len = ptr - self._ptr; + return self; + } + + /* + * @dev Splits the slice, setting `self` to everything after the first + * occurrence of `needle`, and `token` to everything before it. If + * `needle` does not occur in `self`, `self` is set to the empty slice, + * and `token` is set to the entirety of `self`. + * @param self The slice to split. + * @param needle The text to search for in `self`. + * @param token An output parameter to which the first token is written. + * @return `token`. + */ + function split(slice self, slice needle, slice token) internal returns (slice) { + uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr); + token._ptr = self._ptr; + token._len = ptr - self._ptr; + if (ptr == self._ptr + self._len) { + // Not found + self._len = 0; + } else { + self._len -= token._len + needle._len; + self._ptr = ptr + needle._len; + } + return token; + } + + /* + * @dev Splits the slice, setting `self` to everything after the first + * occurrence of `needle`, and returning everything before it. If + * `needle` does not occur in `self`, `self` is set to the empty slice, + * and the entirety of `self` is returned. + * @param self The slice to split. + * @param needle The text to search for in `self`. + * @return The part of `self` up to the first occurrence of `delim`. + */ + function split(slice self, slice needle) internal returns (slice token) { + split(self, needle, token); + } + + /* + * @dev Splits the slice, setting `self` to everything before the last + * occurrence of `needle`, and `token` to everything after it. If + * `needle` does not occur in `self`, `self` is set to the empty slice, + * and `token` is set to the entirety of `self`. + * @param self The slice to split. + * @param needle The text to search for in `self`. + * @param token An output parameter to which the first token is written. + * @return `token`. + */ + function rsplit(slice self, slice needle, slice token) internal returns (slice) { + uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr); + token._ptr = ptr; + token._len = self._len - (ptr - self._ptr); + if (ptr == self._ptr) { + // Not found + self._len = 0; + } else { + self._len -= token._len + needle._len; + } + return token; + } + + /* + * @dev Splits the slice, setting `self` to everything before the last + * occurrence of `needle`, and returning everything after it. If + * `needle` does not occur in `self`, `self` is set to the empty slice, + * and the entirety of `self` is returned. + * @param self The slice to split. + * @param needle The text to search for in `self`. + * @return The part of `self` after the last occurrence of `delim`. + */ + function rsplit(slice self, slice needle) internal returns (slice token) { + rsplit(self, needle, token); + } + + /* + * @dev Counts the number of nonoverlapping occurrences of `needle` in `self`. + * @param self The slice to search. + * @param needle The text to search for in `self`. + * @return The number of occurrences of `needle` found in `self`. + */ + function count(slice self, slice needle) internal returns (uint count) { + uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr) + needle._len; + while (ptr <= self._ptr + self._len) { + count++; + ptr = findPtr(self._len - (ptr - self._ptr), ptr, needle._len, needle._ptr) + needle._len; + } + } + + /* + * @dev Returns True if `self` contains `needle`. + * @param self The slice to search. + * @param needle The text to search for in `self`. + * @return True if `needle` is found in `self`, false otherwise. + */ + function contains(slice self, slice needle) internal returns (bool) { + return rfindPtr(self._len, self._ptr, needle._len, needle._ptr) != self._ptr; + } + + /* + * @dev Returns a newly allocated string containing the concatenation of + * `self` and `other`. + * @param self The first slice to concatenate. + * @param other The second slice to concatenate. + * @return The concatenation of the two strings. + */ + function concat(slice self, slice other) internal returns (string) { + var ret = new string(self._len + other._len); + uint retptr; + assembly { retptr := add(ret, 32) } + memcpy(retptr, self._ptr, self._len); + memcpy(retptr + self._len, other._ptr, other._len); + return ret; + } + + /* + * @dev Joins an array of slices, using `self` as a delimiter, returning a + * newly allocated string. + * @param self The delimiter to use. + * @param parts A list of slices to join. + * @return A newly allocated string containing all the slices in `parts`, + * joined with `self`. + */ + function join(slice self, slice[] parts) internal returns (string) { + if (parts.length == 0) + return ""; + + uint len = self._len * (parts.length - 1); + for(uint i = 0; i < parts.length; i++) + len += parts[i]._len; + + var ret = new string(len); + uint retptr; + assembly { retptr := add(ret, 32) } + + for(i = 0; i < parts.length; i++) { + memcpy(retptr, parts[i]._ptr, parts[i]._len); + retptr += parts[i]._len; + if (i < parts.length - 1) { + memcpy(retptr, self._ptr, self._len); + retptr += self._len; + } + } + + return ret; + } +} |