mownstr/
lib.rs

1//! [`MownStr`]
2//! is either a borrowed reference to a `str` or an own `Box<str>`.
3
4use std::borrow::Cow;
5use std::fmt;
6use std::hash;
7use std::marker::PhantomData;
8use std::ops::Deref;
9use std::ptr::NonNull;
10use std::slice;
11use std::str;
12
13/// "Maybe own str":
14/// either a borrowed reference to a `str` or an owned `Box<str>`.
15///
16/// It does not try to be mutable, nor generic,
17/// which makes it lighter than, for example, `Cow<str>`.
18///
19/// # Panic
20/// The drawback is that `MownStr`
21/// does not support strings with a length > `usize::MAX/2`.
22/// Trying to convert such a large string to a `MownStr` would lead to a memory leak
23/// (but is extremely unlikely in practice anyway).
24pub struct MownStr<'a> {
25    addr: NonNull<u8>,
26    xlen: usize,
27    _phd: PhantomData<&'a str>,
28}
29
30// MownStr does not implement `Sync` and `Send` by default,
31// because NonNull<u8> does not.
32// However, it is safe to declare it as Sync and Send,
33// because MownStr is basically nothing more than a `&str`,
34// or a `Box<str>`, and both are `Sync` and `Send`.
35unsafe impl Sync for MownStr<'_> {}
36unsafe impl Send for MownStr<'_> {}
37
38const LEN_MASK: usize = usize::MAX >> 1;
39const OWN_FLAG: usize = !LEN_MASK;
40
41impl<'a> MownStr<'a> {
42    #[deprecated = "use from_ref instead. This method caused confusion with FromStr::from_str."]
43    #[must_use]
44    pub const fn from_str(other: &'a str) -> Self {
45        Self::from_ref(other)
46    }
47
48    #[must_use]
49    pub const fn from_ref(other: &'a str) -> Self {
50        debug_assert!(other.len() <= LEN_MASK);
51        // NB: The only 'const' constructor for NonNull is new_unchecked
52        // so we need an unsafe block.
53
54        // SAFETY: we need a *mut u8 for new_unchecked,
55        //         but MownStr will never mutate its content
56        let ptr = other.as_ptr().cast_mut();
57        let addr = unsafe {
58            // SAFETY: ptr can not be null,
59            NonNull::new_unchecked(ptr)
60        };
61        MownStr {
62            addr,
63            xlen: other.len(),
64            _phd: PhantomData,
65        }
66    }
67
68    #[must_use]
69    pub const fn is_borrowed(&self) -> bool {
70        (self.xlen & OWN_FLAG) == 0
71    }
72
73    #[must_use]
74    pub const fn is_owned(&self) -> bool {
75        (self.xlen & OWN_FLAG) == OWN_FLAG
76    }
77
78    #[must_use]
79    pub const fn borrowed(&self) -> MownStr {
80        MownStr {
81            addr: self.addr,
82            xlen: self.xlen & LEN_MASK,
83            _phd: PhantomData,
84        }
85    }
86
87    #[inline]
88    const fn real_len(&self) -> usize {
89        self.xlen & LEN_MASK
90    }
91
92    #[inline]
93    unsafe fn make_ref(&self) -> &'a str {
94        debug_assert!(self.is_borrowed(), "make_ref() called on owned MownStr");
95        let ptr = self.addr.as_ptr();
96        let slice = slice::from_raw_parts(ptr, self.xlen);
97        str::from_utf8_unchecked(slice)
98    }
99
100    /// Convert an *owned* `MownStr` to a box.
101    //
102    // NB: conceptually this method consumes the Mownstr.
103    // The reason why self is a mutable ref instead of a move is purely technical
104    // (to make it usable in Drop::drop()).
105    #[inline]
106    unsafe fn extract_box(&mut self) -> Box<str> {
107        debug_assert!(self.is_owned(), "extract_box() called on borrowed MownStr");
108        // extract data to make box
109        let ptr = self.addr.as_ptr();
110        let len = self.real_len();
111        // turn to borrowed, to avoid double-free
112        self.xlen = 0;
113        debug_assert!(self.is_borrowed());
114        // make box
115        let slice = slice::from_raw_parts_mut(ptr, len);
116        let raw = str::from_utf8_unchecked_mut(slice) as *mut str;
117        Box::from_raw(raw)
118    }
119}
120
121impl Drop for MownStr<'_> {
122    fn drop(&mut self) {
123        if self.is_owned() {
124            unsafe {
125                std::mem::drop(self.extract_box());
126            }
127        }
128    }
129}
130
131impl Clone for MownStr<'_> {
132    fn clone(&self) -> Self {
133        if self.is_owned() {
134            Box::<str>::from(&**self).into()
135        } else {
136            MownStr {
137                addr: self.addr,
138                xlen: self.xlen,
139                _phd: self._phd,
140            }
141        }
142    }
143}
144
145// Construct a MownStr
146
147impl<'a> From<&'a str> for MownStr<'a> {
148    fn from(other: &'a str) -> Self {
149        Self::from_ref(other)
150    }
151}
152
153impl From<Box<str>> for MownStr<'_> {
154    fn from(other: Box<str>) -> Self {
155        let len = other.len();
156        debug_assert!(len <= LEN_MASK);
157        let addr = Box::leak(other).as_mut_ptr();
158        let addr = unsafe {
159            // SAFETY: ptr can not be null,
160            NonNull::new_unchecked(addr)
161        };
162
163        let xlen = len | OWN_FLAG;
164        MownStr {
165            addr,
166            xlen,
167            _phd: PhantomData,
168        }
169    }
170}
171
172impl From<String> for MownStr<'_> {
173    fn from(other: String) -> Self {
174        other.into_boxed_str().into()
175    }
176}
177
178impl<'a> From<Cow<'a, str>> for MownStr<'a> {
179    fn from(other: Cow<'a, str>) -> Self {
180        match other {
181            Cow::Borrowed(r) => r.into(),
182            Cow::Owned(s) => s.into(),
183        }
184    }
185}
186
187// Using a MownStr as a str
188
189impl Deref for MownStr<'_> {
190    type Target = str;
191
192    fn deref(&self) -> &str {
193        let ptr = self.addr.as_ptr();
194        let len = self.real_len();
195        unsafe {
196            let slice = slice::from_raw_parts(ptr, len);
197            str::from_utf8_unchecked(slice)
198        }
199    }
200}
201
202impl AsRef<str> for MownStr<'_> {
203    fn as_ref(&self) -> &str {
204        self
205    }
206}
207
208impl std::borrow::Borrow<str> for MownStr<'_> {
209    fn borrow(&self) -> &str {
210        self
211    }
212}
213
214// Comparing between MownStr
215
216impl hash::Hash for MownStr<'_> {
217    fn hash<H: hash::Hasher>(&self, state: &mut H) {
218        self.deref().hash(state);
219    }
220}
221
222impl PartialEq for MownStr<'_> {
223    fn eq(&self, other: &Self) -> bool {
224        **self == **other
225    }
226}
227
228impl Eq for MownStr<'_> {}
229
230impl PartialOrd for MownStr<'_> {
231    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
232        Some(self.cmp(other))
233    }
234}
235
236impl Ord for MownStr<'_> {
237    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
238        self.deref().cmp(&**other)
239    }
240}
241
242// Comparing MownStr with str
243
244impl<'a> PartialEq<&'a str> for MownStr<'a> {
245    fn eq(&self, other: &&'a str) -> bool {
246        &**self == *other
247    }
248}
249
250impl<'a> PartialOrd<&'a str> for MownStr<'a> {
251    fn partial_cmp(&self, other: &&'a str) -> Option<std::cmp::Ordering> {
252        self.deref().partial_cmp(*other)
253    }
254}
255
256impl<'a> PartialEq<MownStr<'a>> for &'a str {
257    fn eq(&self, other: &MownStr<'a>) -> bool {
258        self == &&**other
259    }
260}
261
262impl<'a> PartialOrd<MownStr<'a>> for &'a str {
263    fn partial_cmp(&self, other: &MownStr<'a>) -> Option<std::cmp::Ordering> {
264        self.partial_cmp(&&**other)
265    }
266}
267
268// Formatting
269
270impl fmt::Debug for MownStr<'_> {
271    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
272        fmt::Debug::fmt(&**self, f)
273    }
274}
275
276impl fmt::Display for MownStr<'_> {
277    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
278        fmt::Display::fmt(&**self, f)
279    }
280}
281
282// Converting
283
284impl<'a> From<MownStr<'a>> for Box<str> {
285    fn from(other: MownStr<'a>) -> Self {
286        other.to()
287    }
288}
289
290impl<'a> From<MownStr<'a>> for String {
291    fn from(other: MownStr<'a>) -> Self {
292        other.to()
293    }
294}
295
296impl<'a> From<MownStr<'a>> for Cow<'a, str> {
297    fn from(other: MownStr<'a>) -> Self {
298        if other.is_owned() {
299            other.to::<String>().into()
300        } else {
301            unsafe { other.make_ref() }.into()
302        }
303    }
304}
305
306impl<'a> MownStr<'a> {
307    /// Convert this `MownStr` to any type `T`
308    /// that can be created from either a `&str` or a `Box<str>`.
309    ///
310    /// This can not be implemented with the `From` trait,
311    /// because this would conflict with `From<MownStr<'a>>`.
312    ///
313    /// # Usage
314    /// ```
315    /// # use mownstr::MownStr;
316    /// # use std::rc::Rc;
317    /// let ms = MownStr::from("hello world");
318    /// let rc = ms.to::<Rc<str>>();
319    ///
320    /// let o1 = Some(MownStr::from("hi there"));
321    /// let o2 = o1.map(MownStr::to::<Rc<str>>);
322    /// ```
323    #[must_use]
324    pub fn to<T>(mut self) -> T
325    where
326        T: From<&'a str> + From<Box<str>>,
327    {
328        if self.is_owned() {
329            unsafe { self.extract_box() }.into()
330        } else {
331            unsafe { self.make_ref() }.into()
332        }
333    }
334}
335
336#[cfg(test)]
337#[allow(clippy::eq_op)]
338mod test {
339    use super::MownStr;
340    use std::borrow::Cow;
341    use std::collections::HashSet;
342
343    #[test]
344    fn size() {
345        assert_eq!(
346            std::mem::size_of::<MownStr<'static>>(),
347            std::mem::size_of::<&'static str>(),
348        );
349    }
350
351    #[test]
352    fn niche() {
353        assert_eq!(
354            std::mem::size_of::<MownStr<'static>>(),
355            std::mem::size_of::<Option<MownStr<'static>>>(),
356        );
357    }
358
359    #[test]
360    fn empty_string() {
361        let empty = String::new();
362        let _ = MownStr::from(empty);
363    }
364
365    #[test]
366    fn build_borrowed_empty() {
367        let mown: MownStr = "".into();
368        assert!(mown.is_borrowed());
369        assert_eq!(mown, "");
370    }
371
372    #[test]
373    fn build_borrowed() {
374        let mown: MownStr = "hello".into();
375        assert!(mown.is_borrowed());
376    }
377
378    #[test]
379    fn build_owned_from_box() {
380        let bx: Box<str> = "hello".into();
381        let mown: MownStr = bx.into();
382        assert!(mown.is_owned());
383    }
384
385    #[test]
386    fn build_owned_from_string() {
387        let mown: MownStr = "hello".to_string().into();
388        assert!(mown.is_owned());
389    }
390
391    #[test]
392    fn build_borrowed_from_cow() {
393        let mown: MownStr = Cow::Borrowed("hello").into();
394        assert!(mown.is_borrowed());
395    }
396
397    #[test]
398    fn build_owned_from_cow() {
399        let mown: MownStr = Cow::<str>::Owned("hello".to_string()).into();
400        assert!(mown.is_owned());
401    }
402
403    #[test]
404    fn borrowed() {
405        let mown1: MownStr = "hello".to_string().into();
406        let mown2 = mown1.borrowed();
407        assert!(mown2.is_borrowed());
408        assert_eq!(mown1, mown2);
409    }
410
411    #[test]
412    fn deref() {
413        let txt = "hello";
414        let mown1: MownStr = txt.into();
415        assert_eq!(&*mown1, txt);
416        assert_eq!(&mown1[..], txt);
417        let mown2: MownStr = txt.to_string().into();
418        assert_eq!(&*mown2, txt);
419        assert_eq!(&mown2[..], txt);
420    }
421
422    #[test]
423    fn hash() {
424        let txt = "hello";
425        let mown1: MownStr = txt.into();
426        let mown2: MownStr = txt.to_string().into();
427
428        let mut set = HashSet::new();
429        set.insert(mown1.clone());
430        assert!(set.contains(&mown1));
431        assert!(set.contains(&mown2));
432        assert!(set.contains(txt));
433
434        let mut set = HashSet::new();
435        set.insert(mown2.clone());
436        assert!(set.contains(&mown1));
437        assert!(set.contains(&mown2));
438        assert!(set.contains(txt));
439    }
440
441    #[test]
442    fn eq() {
443        let txt = "hello";
444        let mown1: MownStr = txt.into();
445        let mown2: MownStr = txt.to_string().into();
446
447        assert_eq!(mown1, txt);
448        assert_eq!(mown1, mown1);
449        assert_eq!(mown1, mown2);
450        assert_eq!(mown2, txt);
451        assert_eq!(mown2, mown1);
452        assert_eq!(mown2, mown2);
453        assert_eq!(txt, mown1);
454        assert_eq!(txt, mown2);
455    }
456
457    #[test]
458    fn order() {
459        let txt = "hello";
460        let mown1: MownStr = txt[..4].into();
461        let mown2: MownStr = txt[..3].to_string().into();
462
463        assert!(mown1 <= txt);
464        assert!(mown1 <= mown1);
465        assert!(mown1 >= mown2);
466        assert!(mown2 <= txt);
467        assert!(mown2 <= mown1);
468        assert!(mown2 >= mown2);
469        assert!(txt >= mown1);
470        assert!(txt >= mown2);
471    }
472
473    #[test]
474    fn display() {
475        let mown1: MownStr = "hello".into();
476        let mown2: MownStr = "hello".to_string().into();
477        assert_eq!(format!("{mown1:?}"), "\"hello\"");
478        assert_eq!(format!("{mown2:?}"), "\"hello\"");
479        assert_eq!(format!("{mown1}"), "hello");
480        assert_eq!(format!("{mown2}"), "hello");
481    }
482
483    #[test]
484    fn no_double_free() {
485        let bx = {
486            let mown = MownStr::from("hello world".to_string());
487            assert_eq!(&mown[..4], "hell");
488            mown.to::<Box<str>>()
489        };
490        assert_eq!(&bx[..4], "hell");
491    }
492
493    #[cfg(target_os = "linux")]
494    #[test]
495    fn no_memory_leak() {
496        const CAP: usize = 100_000_000;
497
498        fn get_rss_anon() -> usize {
499            if cfg!(miri) {
500                return 0; // return dummy value, as miri can not open files
501            }
502            let txt = std::fs::read_to_string("/proc/self/status").expect("read proc status");
503            let txt = txt.split("RssAnon:").nth(1).unwrap();
504            let txt = txt.split(" kB").next().unwrap();
505            let txt = txt.trim();
506            txt.parse().unwrap()
507        }
508        // performs several MownStr allocation in sequence,
509        // dropping each one before allocating the next one
510        // (unless the v.pop() line below is commented out).
511        //
512        // If there is no memory leak,
513        // the increase in memory should be roughly 1 time the allocated size;
514        // otherwise, it should be roghly 10 times that size.
515        //
516        // NB: in miri, the value returned by get_rss_anon is fake,
517        // so no memory leak will ever be detected;
518        // but the test is still executed in miri to detect UB.
519
520        let m0 = get_rss_anon();
521        println!("memory = {} kB", m0);
522        let mut v = vec![];
523        for i in 0..10 {
524            v.pop(); // COMMENT THIS LINE OUT to simulate a memory leak
525            let s = unsafe { String::from_utf8_unchecked(vec![b'a' + i; CAP]) };
526            v.push(MownStr::from(s));
527            println!(
528                "{} MownStr(s) in the Vec, of len {}, starting with {:?}",
529                v.len(),
530                v[v.len() - 1].len(),
531                &v[v.len() - 1][..2]
532            );
533        }
534        let m1 = get_rss_anon();
535        println!("memory = {} kB", m1);
536        assert!(!v.is_empty()); // ensure that v is not optimized away to soon
537        let increase = (m1 - m0) as f64 / (CAP / 1000) as f64;
538        println!("increase = {}", increase);
539        assert!(increase < 1.5);
540    }
541}