jiff/util/array_str.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
/// A simple and not the most-efficient fixed size string on the stack.
///
/// This supplanted some uses of `Box<str>` for storing tiny strings in an
/// effort to reduce our dependence on dynamic memory allocation.
///
/// Also, since it isn't needed and it lets us save on storage requirements,
/// `N` must be less than `256` (so that the length can fit in a `u8`).
#[derive(Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)]
pub(crate) struct ArrayStr<const N: usize> {
/// The UTF-8 bytes that make up the string.
///
/// This array---the entire array---is always valid UTF-8. And
/// the `0..self.len` sub-slice is also always valid UTF-8.
bytes: [u8; N],
/// The number of bytes used by the string in `bytes`.
///
/// (We could technically save this byte in some cases and use a NUL
/// terminator. For example, since we don't permit NUL bytes in POSIX time
/// zone abbreviation strings, but this is simpler and only one byte and
/// generalizes. And we're not really trying to micro-optimize the storage
/// requirements when we use these array strings. Or at least, I don't know
/// of a reason to.)
len: u8,
}
impl<const N: usize> ArrayStr<N> {
/// Creates a new fixed capacity string.
///
/// If the given string exceeds `N` bytes, then this returns
/// `None`.
pub(crate) fn new(s: &str) -> Option<ArrayStr<N>> {
let len = s.len();
if len > N {
return None;
}
let mut bytes = [0; N];
bytes[..len].copy_from_slice(s.as_bytes());
// OK because we don't ever use anything bigger than u8::MAX for `N`.
// And we probably shouldn't, because that would be a pretty chunky
// array. If such a thing is needed, please file an issue to discuss.
debug_assert!(
N <= usize::from(u8::MAX),
"size of ArrayStr is too big"
);
let len = u8::try_from(len).unwrap();
Some(ArrayStr { bytes, len })
}
/// Returns the capacity of this fixed string.
pub(crate) const fn capacity() -> usize {
N
}
/// Append the bytes given to the end of this string.
///
/// If the capacity would be exceeded, then this is a no-op and `false`
/// is returned.
pub(crate) fn push_str(&mut self, s: &str) -> bool {
let len = usize::from(self.len);
let Some(new_len) = len.checked_add(s.len()) else { return false };
if new_len > N {
return false;
}
self.bytes[len..new_len].copy_from_slice(s.as_bytes());
// OK because we don't ever use anything bigger than u8::MAX for `N`.
// And we probably shouldn't, because that would be a pretty chunky
// array. If such a thing is needed, please file an issue to discuss.
debug_assert!(
N <= usize::from(u8::MAX),
"size of ArrayStr is too big"
);
self.len = u8::try_from(new_len).unwrap();
true
}
/// Returns this array string as a string slice.
pub(crate) fn as_str(&self) -> &str {
// OK because construction guarantees valid UTF-8.
//
// This is bullet proof enough to use unchecked `str` construction
// here, but I can't dream up of a benchmark where it matters.
core::str::from_utf8(&self.bytes[..usize::from(self.len)]).unwrap()
}
}
/// Easy construction of `ArrayStr` from `&'static str`.
///
/// We specifically limit to `&'static str` to approximate string literals.
/// This prevents most cases of accidentally creating a non-string literal
/// that panics if the string is too big.
///
/// This impl primarily exists to make writing tests more convenient.
impl<const N: usize> From<&'static str> for ArrayStr<N> {
fn from(s: &'static str) -> ArrayStr<N> {
ArrayStr::new(s).unwrap()
}
}
impl<const N: usize> PartialEq<str> for ArrayStr<N> {
fn eq(&self, rhs: &str) -> bool {
self.as_str() == rhs
}
}
impl<const N: usize> PartialEq<&str> for ArrayStr<N> {
fn eq(&self, rhs: &&str) -> bool {
self.as_str() == *rhs
}
}
impl<const N: usize> PartialEq<ArrayStr<N>> for str {
fn eq(&self, rhs: &ArrayStr<N>) -> bool {
self == rhs.as_str()
}
}
impl<const N: usize> core::fmt::Debug for ArrayStr<N> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
core::fmt::Debug::fmt(self.as_str(), f)
}
}
impl<const N: usize> core::fmt::Display for ArrayStr<N> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
core::fmt::Display::fmt(self.as_str(), f)
}
}
impl<const N: usize> core::fmt::Write for ArrayStr<N> {
fn write_str(&mut self, s: &str) -> core::fmt::Result {
if self.push_str(s) {
Ok(())
} else {
Err(core::fmt::Error)
}
}
}
/// A self-imposed limit on the size of a time zone abbreviation, in bytes.
///
/// POSIX says this:
///
/// > Indicate no less than three, nor more than {TZNAME_MAX}, bytes that are
/// > the designation for the standard (std) or the alternative (dst -such as
/// > Daylight Savings Time) timezone.
///
/// But it doesn't seem worth the trouble to query `TZNAME_MAX`. Interestingly,
/// IANA says:
///
/// > are 3 or more characters specifying the standard and daylight saving time
/// > (DST) zone abbreviations
///
/// Which implies that IANA thinks there is no limit. But that seems unwise.
/// Moreover, in practice, it seems like the `date` utility supports fairly
/// long abbreviations. On my mac (so, BSD `date` as I understand it):
///
/// ```text
/// $ TZ=ZZZ5YYYYYYYYYYYYYYYYYYYYY date
/// Sun Mar 17 20:05:58 YYYYYYYYYYYYYYYYYYYYY 2024
/// ```
///
/// And on my Linux machine (so, GNU `date`):
///
/// ```text
/// $ TZ=ZZZ5YYYYYYYYYYYYYYYYYYYYY date
/// Sun Mar 17 08:05:36 PM YYYYYYYYYYYYYYYYYYYYY 2024
/// ```
///
/// I don't know exactly what limit these programs use, but 30 seems good
/// enough?
///
/// (Previously, I had been using 255 and stuffing the string in a `Box<str>`.
/// But as part of work on [#168], I was looking to remove allocation from as
/// many places as possible. And this was one candidate. But making room on the
/// stack for 255 byte abbreviations seemed gratuitous. So I picked something
/// smaller. If we come across an abbreviation bigger than this max, then we'll
/// error.)
///
/// [#168]: https://github.com/BurntSushi/jiff/issues/168
const ABBREVIATION_MAX: usize = 30;
/// A type alias for centralizing the definition of a time zone abbreviation.
///
/// Basically, this creates one single coherent place where we control the
/// length of a time zone abbreviation.
pub(crate) type Abbreviation = ArrayStr<ABBREVIATION_MAX>;
#[cfg(test)]
mod tests {
use core::fmt::Write;
use super::*;
#[test]
fn fmt_write() {
let mut dst = ArrayStr::<5>::new("").unwrap();
assert!(write!(&mut dst, "abcd").is_ok());
assert!(write!(&mut dst, "e").is_ok());
assert!(write!(&mut dst, "f").is_err());
}
}