/* * @title String & slice utility library for Solidity contracts. * @author Nick Johnson * * @dev Functionality in this library is largely implemented using an * abstraction called a 'slice'. A slice represents a part of a string - * anything from the entire string to a single character, or even no * characters at all (a 0-length slice). Since a slice only has to specify * an offset and a length, copying and manipulating slices is a lot less * expensive than copying and manipulating the strings they reference. * * To further reduce gas costs, most functions on slice that need to return * a slice modify the original one instead of allocating a new one; for * instance, `s.split(".")` will return the text up to the first '.', * modifying s to only contain the remainder of the string after the '.'. * In situations where you do not want to modify the original slice, you * can make a copy first with `.copy()`, for example: * `s.copy().split(".")`. Try and avoid using this idiom in loops; since * Solidity has no memory management, it will result in allocating many * short-lived slices that are later discarded. * * Functions that return two slices come in two versions: a non-allocating * version that takes the second slice as an argument, modifying it in * place, and an allocating version that allocates and returns the second * slice; see `nextRune` for example. * * Functions that have to copy string data will return strings rather than * slices; these can be cast back to slices for further processing if * required. * * For convenience, some functions are provided with non-modifying * variants that create a new slice and return both; for instance, * `s.splitNew('.')` leaves s unmodified, and returns two values * corresponding to the left and right parts of the string. */ library strings { struct slice { uint _len; uint _ptr; } function memcpy(uint dest, uint src, uint len) private { // Copy word-length chunks while possible for(; len >= 32; len -= 32) { assembly { mstore(dest, mload(src)) } dest += 32; src += 32; } // Copy remaining bytes uint mask = 256 ** (32 - len) - 1; assembly { let srcpart := and(mload(src), not(mask)) let destpart := and(mload(dest), mask) mstore(dest, or(destpart, srcpart)) } } /* * @dev Returns a slice containing the entire string. * @param self The string to make a slice from. * @return A newly allocated slice containing the entire string. */ function toSlice(string self) internal returns (slice) { uint ptr; assembly { ptr := add(self, 0x20) } return slice(bytes(self).length, ptr); } /* * @dev Returns the length of a null-terminated bytes32 string. * @param self The value to find the length of. * @return The length of the string, from 0 to 32. */ function len(bytes32 self) internal returns (uint) { uint ret; if (self == 0) return 0; if (self & 0xffffffffffffffffffffffffffffffff == 0) { ret += 16; self = bytes32(uint(self) / 0x100000000000000000000000000000000); } if (self & 0xffffffffffffffff == 0) { ret += 8; self = bytes32(uint(self) / 0x10000000000000000); } if (self & 0xffffffff == 0) { ret += 4; self = bytes32(uint(self) / 0x100000000); } if (self & 0xffff == 0) { ret += 2; self = bytes32(uint(self) / 0x10000); } if (self & 0xff == 0) { ret += 1; } return 32 - ret; } /* * @dev Returns a slice containing the entire bytes32, interpreted as a * null-termintaed utf-8 string. * @param self The bytes32 value to convert to a slice. * @return A new slice containing the value of the input argument up to the * first null. */ function toSliceB32(bytes32 self) internal returns (slice ret) { // Allocate space for `self` in memory, copy it there, and point ret at it assembly { let ptr := mload(0x40) mstore(0x40, add(ptr, 0x20)) mstore(ptr, self) mstore(add(ret, 0x20), ptr) } ret._len = len(self); } /* * @dev Returns a new slice containing the same data as the current slice. * @param self The slice to copy. * @return A new slice containing the same data as `self`. */ function copy(slice self) internal returns (slice) { return slice(self._len, self._ptr); } /* * @dev Copies a slice to a new string. * @param self The slice to copy. * @return A newly allocated string containing the slice's text. */ function toString(slice self) internal returns (string) { var ret = new string(self._len); uint retptr; assembly { retptr := add(ret, 32) } memcpy(retptr, self._ptr, self._len); return ret; } /* * @dev Returns the length in runes of the slice. Note that this operation * takes time proportional to the length of the slice; avoid using it * in loops, and call `slice.empty()` if you only need to know whether * the slice is empty or not. * @param self The slice to operate on. * @return The length of the slice in runes. */ function len(slice self) internal returns (uint) { // Starting at ptr-31 means the LSB will be the byte we care about var ptr = self._ptr - 31; var end = ptr + self._len; uint len; for (len = 0; ptr < end; len++) { uint8 b; assembly { b := and(mload(ptr), 0xFF) } if (b < 0x80) { ptr += 1; } else if(b < 0xE0) { ptr += 2; } else if(b < 0xF0) { ptr += 3; } else if(b < 0xF8) { ptr += 4; } else if(b < 0xFC) { ptr += 5; } else { ptr += 6; } } return len; } /* * @dev Returns true if the slice is empty (has a length of 0). * @param self The slice to operate on. * @return True if the slice is empty, False otherwise. */ function empty(slice self) internal returns (bool) { return self._len == 0; } /* * @dev Returns a positive number if `other` comes lexicographically after * `self`, a negative number if it comes before, or zero if the * contents of the two slices are equal. Comparison is done per-rune, * on unicode codepoints. * @param self The first slice to compare. * @param other The second slice to compare. * @return The result of the comparison. */ function compare(slice self, slice other) internal returns (int) { uint shortest = self._len; if (other._len < self._len) shortest = other._len; var selfptr = self._ptr; var otherptr = other._ptr; for (uint idx = 0; idx < shortest; idx += 32) { uint a; uint b; assembly { a := mload(selfptr) b := mload(otherptr) } if (a != b) { // Mask out irrelevant bytes and check again uint mask = ~(2 ** (8 * (32 - shortest + idx)) - 1); var diff = (a & mask) - (b & mask); if (diff != 0) return int(diff); } selfptr += 32; otherptr += 32; } return int(self._len) - int(other._len); } /* * @dev Returns true if the two slices contain the same text. * @param self The first slice to compare. * @param self The second slice to compare. * @return True if the slices are equal, false otherwise. */ function equals(slice self, slice other) internal returns (bool) { return compare(self, other) == 0; } /* * @dev Extracts the first rune in the slice into `rune`, advancing the * slice to point to the next rune and returning `self`. * @param self The slice to operate on. * @param rune The slice that will contain the first rune. * @return `rune`. */ function nextRune(slice self, slice rune) internal returns (slice) { rune._ptr = self._ptr; if (self._len == 0) { rune._len = 0; return rune; } uint len; uint b; // Load the first byte of the rune into the LSBs of b assembly { b := and(mload(sub(mload(add(self, 32)), 31)), 0xFF) } if (b < 0x80) { len = 1; } else if(b < 0xE0) { len = 2; } else if(b < 0xF0) { len = 3; } else { len = 4; } // Check for truncated codepoints if (len > self._len) { rune._len = self._len; self._ptr += self._len; self._len = 0; return rune; } self._ptr += len; self._len -= len; rune._len = len; return rune; } /* * @dev Returns the first rune in the slice, advancing the slice to point * to the next rune. * @param self The slice to operate on. * @return A slice containing only the first rune from `self`. */ function nextRune(slice self) internal returns (slice ret) { nextRune(self, ret); } /* * @dev Returns the number of the first codepoint in the slice. * @param self The slice to operate on. * @return The number of the first codepoint in the slice. */ function ord(slice self) internal returns (uint ret) { if (self._len == 0) { return 0; } uint word; uint len; uint div = 2 ** 248; // Load the rune into the MSBs of b assembly { word:= mload(mload(add(self, 32))) } var b = word / div; if (b < 0x80) { ret = b; len = 1; } else if(b < 0xE0) { ret = b & 0x1F; len = 2; } else if(b < 0xF0) { ret = b & 0x0F; len = 3; } else { ret = b & 0x07; len = 4; } // Check for truncated codepoints if (len > self._len) { return 0; } for (uint i = 1; i < len; i++) { div = div / 256; b = (word / div) & 0xFF; if (b & 0xC0 != 0x80) { // Invalid UTF-8 sequence return 0; } ret = (ret * 64) | (b & 0x3F); } return ret; } /* * @dev Returns the keccak-256 hash of the slice. * @param self The slice to hash. * @return The hash of the slice. */ function keccak(slice self) internal returns (bytes32 ret) { assembly { ret := keccak256(mload(add(self, 32)), mload(self)) } } /* * @dev Returns true if `self` starts with `needle`. * @param self The slice to operate on. * @param needle The slice to search for. * @return True if the slice starts with the provided text, false otherwise. */ function startsWith(slice self, slice needle) internal returns (bool) { if (self._len < needle._len) { return false; } if (self._ptr == needle._ptr) { return true; } bool equal; assembly { let len := mload(needle) let selfptr := mload(add(self, 0x20)) let needleptr := mload(add(needle, 0x20)) equal := eq(keccak256(selfptr, len), keccak256(needleptr, len)) } return equal; } /* * @dev If `self` starts with `needle`, `needle` is removed from the * beginning of `self`. Otherwise, `self` is unmodified. * @param self The slice to operate on. * @param needle The slice to search for. * @return `self` */ function beyond(slice self, slice needle) internal returns (slice) { if (self._len < needle._len) { return self; } bool equal = true; if (self._ptr != needle._ptr) { assembly { let len := mload(needle) let selfptr := mload(add(self, 0x20)) let needleptr := mload(add(needle, 0x20)) equal := eq(keccak256(selfptr, len), keccak256(needleptr, len)) } } if (equal) { self._len -= needle._len; self._ptr += needle._len; } return self; } /* * @dev Returns true if the slice ends with `needle`. * @param self The slice to operate on. * @param needle The slice to search for. * @return True if the slice starts with the provided text, false otherwise. */ function endsWith(slice self, slice needle) internal returns (bool) { if (self._len < needle._len) { return false; } var selfptr = self._ptr + self._len - needle._len; if (selfptr == needle._ptr) { return true; } bool equal; assembly { let len := mload(needle) let needleptr := mload(add(needle, 0x20)) equal := eq(keccak256(selfptr, len), keccak256(needleptr, len)) } return equal; } /* * @dev If `self` ends with `needle`, `needle` is removed from the * end of `self`. Otherwise, `self` is unmodified. * @param self The slice to operate on. * @param needle The slice to search for. * @return `self` */ function until(slice self, slice needle) internal returns (slice) { if (self._len < needle._len) { return self; } var selfptr = self._ptr + self._len - needle._len; bool equal = true; if (selfptr != needle._ptr) { assembly { let len := mload(needle) let needleptr := mload(add(needle, 0x20)) equal := eq(keccak256(selfptr, len), keccak256(needleptr, len)) } } if (equal) { self._len -= needle._len; } return self; } // Returns the memory address of the first byte of the first occurrence of // `needle` in `self`, or the first byte after `self` if not found. function findPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private returns (uint) { uint ptr; uint idx; if (needlelen <= selflen) { if (needlelen <= 32) { // Optimized assembly for 68 gas per byte on short strings assembly { let mask := not(sub(exp(2, mul(8, sub(32, needlelen))), 1)) let needledata := and(mload(needleptr), mask) let end := add(selfptr, sub(selflen, needlelen)) ptr := selfptr loop: jumpi(exit, eq(and(mload(ptr), mask), needledata)) ptr := add(ptr, 1) jumpi(loop, lt(sub(ptr, 1), end)) ptr := add(selfptr, selflen) exit: } return ptr; } else { // For long needles, use hashing bytes32 hash; assembly { hash := keccak256(needleptr, needlelen) } ptr = selfptr; for (idx = 0; idx <= selflen - needlelen; idx++) { bytes32 testHash; assembly { testHash := keccak256(ptr, needlelen) } if (hash == testHash) return ptr; ptr += 1; } } } return selfptr + selflen; } // Returns the memory address of the first byte after the last occurrence of // `needle` in `self`, or the address of `self` if not found. function rfindPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private returns (uint) { uint ptr; if (needlelen <= selflen) { if (needlelen <= 32) { // Optimized assembly for 69 gas per byte on short strings assembly { let mask := not(sub(exp(2, mul(8, sub(32, needlelen))), 1)) let needledata := and(mload(needleptr), mask) ptr := add(selfptr, sub(selflen, needlelen)) loop: jumpi(ret, eq(and(mload(ptr), mask), needledata)) ptr := sub(ptr, 1) jumpi(loop, gt(add(ptr, 1), selfptr)) ptr := selfptr jump(exit) ret: ptr := add(ptr, needlelen) exit: } return ptr; } else { // For long needles, use hashing bytes32 hash; assembly { hash := keccak256(needleptr, needlelen) } ptr = selfptr + (selflen - needlelen); while (ptr >= selfptr) { bytes32 testHash; assembly { testHash := keccak256(ptr, needlelen) } if (hash == testHash) return ptr + needlelen; ptr -= 1; } } } return selfptr; } /* * @dev Modifies `self` to contain everything from the first occurrence of * `needle` to the end of the slice. `self` is set to the empty slice * if `needle` is not found. * @param self The slice to search and modify. * @param needle The text to search for. * @return `self`. */ function find(slice self, slice needle) internal returns (slice) { uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr); self._len -= ptr - self._ptr; self._ptr = ptr; return self; } /* * @dev Modifies `self` to contain the part of the string from the start of * `self` to the end of the first occurrence of `needle`. If `needle` * is not found, `self` is set to the empty slice. * @param self The slice to search and modify. * @param needle The text to search for. * @return `self`. */ function rfind(slice self, slice needle) internal returns (slice) { uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr); self._len = ptr - self._ptr; return self; } /* * @dev Splits the slice, setting `self` to everything after the first * occurrence of `needle`, and `token` to everything before it. If * `needle` does not occur in `self`, `self` is set to the empty slice, * and `token` is set to the entirety of `self`. * @param self The slice to split. * @param needle The text to search for in `self`. * @param token An output parameter to which the first token is written. * @return `token`. */ function split(slice self, slice needle, slice token) internal returns (slice) { uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr); token._ptr = self._ptr; token._len = ptr - self._ptr; if (ptr == self._ptr + self._len) { // Not found self._len = 0; } else { self._len -= token._len + needle._len; self._ptr = ptr + needle._len; } return token; } /* * @dev Splits the slice, setting `self` to everything after the first * occurrence of `needle`, and returning everything before it. If * `needle` does not occur in `self`, `self` is set to the empty slice, * and the entirety of `self` is returned. * @param self The slice to split. * @param needle The text to search for in `self`. * @return The part of `self` up to the first occurrence of `delim`. */ function split(slice self, slice needle) internal returns (slice token) { split(self, needle, token); } /* * @dev Splits the slice, setting `self` to everything before the last * occurrence of `needle`, and `token` to everything after it. If * `needle` does not occur in `self`, `self` is set to the empty slice, * and `token` is set to the entirety of `self`. * @param self The slice to split. * @param needle The text to search for in `self`. * @param token An output parameter to which the first token is written. * @return `token`. */ function rsplit(slice self, slice needle, slice token) internal returns (slice) { uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr); token._ptr = ptr; token._len = self._len - (ptr - self._ptr); if (ptr == self._ptr) { // Not found self._len = 0; } else { self._len -= token._len + needle._len; } return token; } /* * @dev Splits the slice, setting `self` to everything before the last * occurrence of `needle`, and returning everything after it. If * `needle` does not occur in `self`, `self` is set to the empty slice, * and the entirety of `self` is returned. * @param self The slice to split. * @param needle The text to search for in `self`. * @return The part of `self` after the last occurrence of `delim`. */ function rsplit(slice self, slice needle) internal returns (slice token) { rsplit(self, needle, token); } /* * @dev Counts the number of nonoverlapping occurrences of `needle` in `self`. * @param self The slice to search. * @param needle The text to search for in `self`. * @return The number of occurrences of `needle` found in `self`. */ function count(slice self, slice needle) internal returns (uint count) { uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr) + needle._len; while (ptr <= self._ptr + self._len) { count++; ptr = findPtr(self._len - (ptr - self._ptr), ptr, needle._len, needle._ptr) + needle._len; } } /* * @dev Returns True if `self` contains `needle`. * @param self The slice to search. * @param needle The text to search for in `self`. * @return True if `needle` is found in `self`, false otherwise. */ function contains(slice self, slice needle) internal returns (bool) { return rfindPtr(self._len, self._ptr, needle._len, needle._ptr) != self._ptr; } /* * @dev Returns a newly allocated string containing the concatenation of * `self` and `other`. * @param self The first slice to concatenate. * @param other The second slice to concatenate. * @return The concatenation of the two strings. */ function concat(slice self, slice other) internal returns (string) { var ret = new string(self._len + other._len); uint retptr; assembly { retptr := add(ret, 32) } memcpy(retptr, self._ptr, self._len); memcpy(retptr + self._len, other._ptr, other._len); return ret; } /* * @dev Joins an array of slices, using `self` as a delimiter, returning a * newly allocated string. * @param self The delimiter to use. * @param parts A list of slices to join. * @return A newly allocated string containing all the slices in `parts`, * joined with `self`. */ function join(slice self, slice[] parts) internal returns (string) { if (parts.length == 0) return ""; uint len = self._len * (parts.length - 1); for(uint i = 0; i < parts.length; i++) len += parts[i]._len; var ret = new string(len); uint retptr; assembly { retptr := add(ret, 32) } for(uint i = 0; i < parts.length; i++) { memcpy(retptr, parts[i]._ptr, parts[i]._len); retptr += parts[i]._len; if (i < parts.length - 1) { memcpy(retptr, self._ptr, self._len); retptr += self._len; } } return ret; } }