mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-02-11 05:31:09 +00:00
248 lines
8.4 KiB
Rust
248 lines
8.4 KiB
Rust
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
|
|
// file at the top-level directory of this distribution and at
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
// option. This file may not be copied, modified, or distributed
|
|
// except according to those terms.
|
|
|
|
use super::UnicodeSegmentation;
|
|
|
|
use std::prelude::v1::*;
|
|
|
|
#[test]
|
|
fn test_graphemes() {
|
|
use crate::testdata::{TEST_DIFF, TEST_SAME};
|
|
|
|
pub const EXTRA_DIFF: &'static [(
|
|
&'static str,
|
|
&'static [&'static str],
|
|
&'static [&'static str],
|
|
)] = &[
|
|
// Official test suite doesn't include two Prepend chars between two other chars.
|
|
(
|
|
"\u{20}\u{600}\u{600}\u{20}",
|
|
&["\u{20}", "\u{600}\u{600}\u{20}"],
|
|
&["\u{20}", "\u{600}", "\u{600}", "\u{20}"],
|
|
),
|
|
// Test for Prepend followed by two Any chars
|
|
(
|
|
"\u{600}\u{20}\u{20}",
|
|
&["\u{600}\u{20}", "\u{20}"],
|
|
&["\u{600}", "\u{20}", "\u{20}"],
|
|
),
|
|
];
|
|
|
|
pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[
|
|
// family emoji (more than two emoji joined by ZWJ)
|
|
(
|
|
"\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}",
|
|
&["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"],
|
|
),
|
|
// cartwheel emoji followed by two fitzpatrick skin tone modifiers
|
|
// (test case from issue #19)
|
|
(
|
|
"\u{1F938}\u{1F3FE}\u{1F3FE}",
|
|
&["\u{1F938}\u{1F3FE}\u{1F3FE}"],
|
|
),
|
|
];
|
|
|
|
for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) {
|
|
// test forward iterator
|
|
assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned()));
|
|
assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned()));
|
|
|
|
// test reverse iterator
|
|
assert!(UnicodeSegmentation::graphemes(s, true)
|
|
.rev()
|
|
.eq(g.iter().rev().cloned()));
|
|
assert!(UnicodeSegmentation::graphemes(s, false)
|
|
.rev()
|
|
.eq(g.iter().rev().cloned()));
|
|
}
|
|
|
|
for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) {
|
|
// test forward iterator
|
|
assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned()));
|
|
assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned()));
|
|
|
|
// test reverse iterator
|
|
assert!(UnicodeSegmentation::graphemes(s, true)
|
|
.rev()
|
|
.eq(gt.iter().rev().cloned()));
|
|
assert!(UnicodeSegmentation::graphemes(s, false)
|
|
.rev()
|
|
.eq(gf.iter().rev().cloned()));
|
|
}
|
|
|
|
// test the indices iterators
|
|
let s = "a̐éö̲\r\n";
|
|
let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).collect::<Vec<(usize, &str)>>();
|
|
let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
|
|
assert_eq!(gr_inds, b);
|
|
let gr_inds = UnicodeSegmentation::grapheme_indices(s, true)
|
|
.rev()
|
|
.collect::<Vec<(usize, &str)>>();
|
|
let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")];
|
|
assert_eq!(gr_inds, b);
|
|
let mut gr_inds_iter = UnicodeSegmentation::grapheme_indices(s, true);
|
|
{
|
|
let gr_inds = gr_inds_iter.by_ref();
|
|
let e1 = gr_inds.size_hint();
|
|
assert_eq!(e1, (1, Some(13)));
|
|
let c = gr_inds.count();
|
|
assert_eq!(c, 4);
|
|
}
|
|
let e2 = gr_inds_iter.size_hint();
|
|
assert_eq!(e2, (0, Some(0)));
|
|
|
|
// make sure the reverse iterator does the right thing with "\n" at beginning of string
|
|
let s = "\n\r\n\r";
|
|
let gr = UnicodeSegmentation::graphemes(s, true)
|
|
.rev()
|
|
.collect::<Vec<&str>>();
|
|
let b: &[_] = &["\r", "\r\n", "\n"];
|
|
assert_eq!(gr, b);
|
|
}
|
|
|
|
#[test]
|
|
fn test_words() {
|
|
use crate::testdata::TEST_WORD;
|
|
|
|
// Unicode's official tests don't really test longer chains of flag emoji
|
|
// TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ
|
|
const EXTRA_TESTS: &'static [(&'static str, &'static [&'static str])] = &[
|
|
(
|
|
"🇦🇫🇦🇽🇦🇱🇩🇿🇦🇸🇦🇩🇦🇴",
|
|
&["🇦🇫", "🇦🇽", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦🇴"],
|
|
),
|
|
("🇦🇫🇦🇽🇦🇱🇩🇿🇦🇸🇦🇩🇦", &["🇦🇫", "🇦🇽", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦"]),
|
|
(
|
|
"🇦a🇫🇦🇽a🇦🇱🇩🇿🇦🇸🇦🇩🇦",
|
|
&["🇦", "a", "🇫🇦", "🇽", "a", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦"],
|
|
),
|
|
(
|
|
"\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}",
|
|
&["\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}"],
|
|
),
|
|
("😌👎🏼", &["😌", "👎🏼"]),
|
|
// perhaps wrong, spaces should not be included?
|
|
("hello world", &["hello", " ", "world"]),
|
|
("🇨🇦🇨🇭🇿🇲🇿 hi", &["🇨🇦", "🇨🇭", "🇿🇲", "🇿", " ", "hi"]),
|
|
];
|
|
for &(s, w) in TEST_WORD.iter().chain(EXTRA_TESTS.iter()) {
|
|
macro_rules! assert_ {
|
|
($test:expr, $exp:expr, $name:expr) => {
|
|
// collect into vector for better diagnostics in failure case
|
|
let testing = $test.collect::<Vec<_>>();
|
|
let expected = $exp.collect::<Vec<_>>();
|
|
assert_eq!(
|
|
testing, expected,
|
|
"{} test for testcase ({:?}, {:?}) failed.",
|
|
$name, s, w
|
|
)
|
|
};
|
|
}
|
|
// test forward iterator
|
|
assert_!(
|
|
s.split_word_bounds(),
|
|
w.iter().cloned(),
|
|
"Forward word boundaries"
|
|
);
|
|
|
|
// test reverse iterator
|
|
assert_!(
|
|
s.split_word_bounds().rev(),
|
|
w.iter().rev().cloned(),
|
|
"Reverse word boundaries"
|
|
);
|
|
|
|
// generate offsets from word string lengths
|
|
let mut indices = vec![0];
|
|
for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| {
|
|
*t += n;
|
|
Some(*t)
|
|
}) {
|
|
indices.push(i);
|
|
}
|
|
indices.pop();
|
|
let indices = indices;
|
|
|
|
// test forward indices iterator
|
|
assert_!(
|
|
s.split_word_bound_indices().map(|(l, _)| l),
|
|
indices.iter().cloned(),
|
|
"Forward word indices"
|
|
);
|
|
|
|
// test backward indices iterator
|
|
assert_!(
|
|
s.split_word_bound_indices().rev().map(|(l, _)| l),
|
|
indices.iter().rev().cloned(),
|
|
"Reverse word indices"
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_sentences() {
|
|
use crate::testdata::TEST_SENTENCE;
|
|
|
|
for &(s, w) in TEST_SENTENCE.iter() {
|
|
macro_rules! assert_ {
|
|
($test:expr, $exp:expr, $name:expr) => {
|
|
// collect into vector for better diagnostics in failure case
|
|
let testing = $test.collect::<Vec<_>>();
|
|
let expected = $exp.collect::<Vec<_>>();
|
|
assert_eq!(
|
|
testing, expected,
|
|
"{} test for testcase ({:?}, {:?}) failed.",
|
|
$name, s, w
|
|
)
|
|
};
|
|
}
|
|
|
|
assert_!(
|
|
s.split_sentence_bounds(),
|
|
w.iter().cloned(),
|
|
"Forward sentence boundaries"
|
|
);
|
|
}
|
|
}
|
|
|
|
quickcheck! {
|
|
fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool {
|
|
let a = s.graphemes(true).collect::<Vec<_>>();
|
|
let mut b = s.graphemes(true).rev().collect::<Vec<_>>();
|
|
b.reverse();
|
|
a == b
|
|
}
|
|
|
|
fn quickcheck_forward_reverse_graphemes_legacy(s: String) -> bool {
|
|
let a = s.graphemes(false).collect::<Vec<_>>();
|
|
let mut b = s.graphemes(false).rev().collect::<Vec<_>>();
|
|
b.reverse();
|
|
a == b
|
|
}
|
|
|
|
fn quickcheck_join_graphemes(s: String) -> bool {
|
|
let a = s.graphemes(true).collect::<String>();
|
|
let b = s.graphemes(false).collect::<String>();
|
|
a == s && b == s
|
|
}
|
|
|
|
fn quickcheck_forward_reverse_words(s: String) -> bool {
|
|
let a = s.split_word_bounds().collect::<Vec<_>>();
|
|
let mut b = s.split_word_bounds().rev().collect::<Vec<_>>();
|
|
b.reverse();
|
|
a == b
|
|
}
|
|
|
|
fn quickcheck_join_words(s: String) -> bool {
|
|
let a = s.split_word_bounds().collect::<String>();
|
|
a == s
|
|
}
|
|
}
|