diff --git a/.clippy.toml b/.clippy.toml new file mode 100644 index 0000000..1e5837c --- /dev/null +++ b/.clippy.toml @@ -0,0 +1 @@ +too-many-arguments-threshold = 10 \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index 9cf5b99..d024e79 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ -**/blocklist.json binary \ No newline at end of file +**/blocklist.json binary +Cargo.lock binary \ No newline at end of file diff --git a/README.md b/README.md index 0f093cc..b83c806 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # [Sqids Rust](https://sqids.org/rust) +[![Github Actions](https://img.shields.io/github/actions/workflow/status/sqids/sqids-rust/tests.yml)](https://github.com/sqids/sqids-rust/actions) + Sqids (pronounced "squids") is a small library that lets you generate YouTube-looking IDs from numbers. It's good for link shortening, fast & URL-safe ID generation and decoding back into numbers for quicker database lookups. ## Getting started diff --git a/release.toml b/release.toml new file mode 100644 index 0000000..6ff631d --- /dev/null +++ b/release.toml @@ -0,0 +1,2 @@ +consolidate-commits = false +consolidate-pushes = true \ No newline at end of file diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..b4e9813 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,13 @@ +max_width = 100 +comment_width = 100 +hard_tabs = true +edition = "2021" +reorder_imports = true +imports_granularity = "Crate" +use_small_heuristics = "Max" +wrap_comments = true +binop_separator = "Back" +trailing_comma = "Vertical" +trailing_semicolon = true +use_field_init_shorthand = true +format_macro_bodies = true \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 6bcf052..94a07a2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,345 +1,326 @@ use derive_more::Display; -use std::collections::HashSet; -use std::result; +use std::{collections::HashSet, result}; #[derive(Display, Debug)] pub enum Error { - #[display(fmt = "Alphabet length must be at least 5")] - AlphabetLength, - #[display(fmt = "Alphabet must contain unique characters")] - AlphabetUniqueCharacters, - #[display(fmt = "Minimum length has to be between {min} and {max}")] - MinLength { min: usize, max: usize }, - #[display(fmt = "Encoding supports numbers between {min} and {max}")] - EncodingRange { min: u64, max: u64 }, - #[display(fmt = "Ran out of range checking against the blocklist")] - BlocklistOutOfRange, + #[display(fmt = "Alphabet length must be at least 5")] + AlphabetLength, + #[display(fmt = "Alphabet must contain unique characters")] + AlphabetUniqueCharacters, + #[display(fmt = "Minimum length has to be between {min} and {max}")] + MinLength { min: usize, max: usize }, + #[display(fmt = "Encoding supports numbers between {min} and {max}")] + EncodingRange { min: u64, max: u64 }, + #[display(fmt = "Ran out of range checking against the blocklist")] + BlocklistOutOfRange, } pub type Result = result::Result; +pub fn default_blocklist() -> HashSet { + serde_json::from_str(include_str!("blocklist.json")).unwrap() +} + #[derive(Debug)] pub struct Options { - alphabet: String, - min_length: usize, - blocklist: HashSet, + alphabet: String, + min_length: usize, + blocklist: HashSet, } impl Options { - pub fn new( - alphabet: Option, - min_length: Option, - blocklist: Option>, - ) -> Self { - let mut options = Options::default(); + pub fn new( + alphabet: Option, + min_length: Option, + blocklist: Option>, + ) -> Self { + let mut options = Options::default(); - if let Some(alphabet) = alphabet { - options.alphabet = alphabet; - } - if let Some(min_length) = min_length { - options.min_length = min_length; - } - if let Some(blocklist) = blocklist { - options.blocklist = blocklist; - } + if let Some(alphabet) = alphabet { + options.alphabet = alphabet; + } + if let Some(min_length) = min_length { + options.min_length = min_length; + } + if let Some(blocklist) = blocklist { + options.blocklist = blocklist; + } - options - } + options + } } impl Default for Options { - fn default() -> Self { - Options { - alphabet: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".to_string(), - min_length: 0, - blocklist: serde_json::from_str(include_str!("blocklist.json")).unwrap(), - } - } + fn default() -> Self { + Options { + alphabet: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".to_string(), + min_length: 0, + blocklist: default_blocklist(), + } + } } #[derive(Debug)] pub struct Sqids { - alphabet: Vec, - min_length: usize, - blocklist: HashSet, + alphabet: Vec, + min_length: usize, + blocklist: HashSet, } impl Sqids { - pub fn new(options: Option) -> Result { - let options = options.unwrap_or_default(); - let alphabet: Vec = options.alphabet.chars().collect(); + pub fn new(options: Option) -> Result { + let options = options.unwrap_or_default(); + let alphabet: Vec = options.alphabet.chars().collect(); - if alphabet.len() < 5 { - return Err(Error::AlphabetLength); - } + if alphabet.len() < 5 { + return Err(Error::AlphabetLength); + } - let unique_chars: HashSet = alphabet.iter().cloned().collect(); - if unique_chars.len() != alphabet.len() { - return Err(Error::AlphabetUniqueCharacters); - } + let unique_chars: HashSet = alphabet.iter().cloned().collect(); + if unique_chars.len() != alphabet.len() { + return Err(Error::AlphabetUniqueCharacters); + } - let filtered_blocklist: HashSet = options - .blocklist - .iter() - .filter_map(|word| { - let word = word.to_lowercase(); - if word.len() >= 3 && word.chars().all(|c| alphabet.contains(&c)) { - Some(word) - } else { - None - } - }) - .collect(); + let filtered_blocklist: HashSet = options + .blocklist + .iter() + .filter_map(|word| { + let word = word.to_lowercase(); + if word.len() >= 3 && word.chars().all(|c| alphabet.contains(&c)) { + Some(word) + } else { + None + } + }) + .collect(); - let mut sqids = Sqids { - alphabet, - min_length: options.min_length, - blocklist: filtered_blocklist, - }; + let mut sqids = + Sqids { alphabet, min_length: options.min_length, blocklist: filtered_blocklist }; - if options.min_length < sqids.min_value() as usize - || options.min_length > options.alphabet.len() - { - return Err(Error::MinLength { - min: sqids.min_value() as usize, - max: options.alphabet.len(), - }); - } + if options.min_length < sqids.min_value() as usize || + options.min_length > options.alphabet.len() + { + return Err(Error::MinLength { + min: sqids.min_value() as usize, + max: options.alphabet.len(), + }); + } - sqids.alphabet = sqids.shuffle(&sqids.alphabet); - Ok(sqids) - } + sqids.alphabet = sqids.shuffle(&sqids.alphabet); + Ok(sqids) + } - pub fn encode(&self, numbers: &[u64]) -> Result { - if numbers.is_empty() { - return Ok(String::new()); - } + pub fn encode(&self, numbers: &[u64]) -> Result { + if numbers.is_empty() { + return Ok(String::new()); + } - let in_range_numbers: Vec = numbers - .iter() - .copied() - .filter(|&n| n >= self.min_value() && n <= self.max_value()) - .collect(); + let in_range_numbers: Vec = numbers + .iter() + .copied() + .filter(|&n| n >= self.min_value() && n <= self.max_value()) + .collect(); - if in_range_numbers.len() != numbers.len() { - return Err(Error::EncodingRange { - min: self.min_value(), - max: self.max_value(), - }); - } + if in_range_numbers.len() != numbers.len() { + return Err(Error::EncodingRange { min: self.min_value(), max: self.max_value() }); + } - self.encode_numbers(&in_range_numbers, false) - } + self.encode_numbers(&in_range_numbers, false) + } - pub fn decode(&self, id: &str) -> Vec { - let mut ret = Vec::new(); + pub fn decode(&self, id: &str) -> Vec { + let mut ret = Vec::new(); - if id.is_empty() { - return ret; - } + if id.is_empty() { + return ret; + } - let alphabet_chars: HashSet = self.alphabet.iter().cloned().collect(); - if !id.chars().all(|c| alphabet_chars.contains(&c)) { - return ret; - } + let alphabet_chars: HashSet = self.alphabet.iter().cloned().collect(); + if !id.chars().all(|c| alphabet_chars.contains(&c)) { + return ret; + } - let prefix = id.chars().next().unwrap(); - let offset = self.alphabet.iter().position(|&c| c == prefix).unwrap(); - let mut alphabet: Vec = self - .alphabet - .iter() - .cycle() - .skip(offset) - .take(self.alphabet.len()) - .copied() - .collect(); + let prefix = id.chars().next().unwrap(); + let offset = self.alphabet.iter().position(|&c| c == prefix).unwrap(); + let mut alphabet: Vec = + self.alphabet.iter().cycle().skip(offset).take(self.alphabet.len()).copied().collect(); - let partition = alphabet[1]; + let partition = alphabet[1]; - alphabet.remove(1); - alphabet.remove(0); + alphabet.remove(1); + alphabet.remove(0); - let mut id = id[1..].to_string(); + let mut id = id[1..].to_string(); - let partition_index = id.find(partition); - if let Some(idx) = partition_index { - if idx > 0 && idx < id.len() - 1 { - id = id.split_off(idx + 1); - alphabet = self.shuffle(&alphabet); - } - } + let partition_index = id.find(partition); + if let Some(idx) = partition_index { + if idx > 0 && idx < id.len() - 1 { + id = id.split_off(idx + 1); + alphabet = self.shuffle(&alphabet); + } + } - while !id.is_empty() { - let separator = alphabet[alphabet.len() - 1]; - let chunks: Vec<&str> = id.split(separator).collect(); + while !id.is_empty() { + let separator = alphabet[alphabet.len() - 1]; + let chunks: Vec<&str> = id.split(separator).collect(); - if !chunks.is_empty() { - let alphabet_without_separator: Vec = - alphabet.iter().copied().take(alphabet.len() - 1).collect(); - let num = self.to_number(chunks[0], &alphabet_without_separator); - ret.push(num); + if !chunks.is_empty() { + let alphabet_without_separator: Vec = + alphabet.iter().copied().take(alphabet.len() - 1).collect(); + let num = self.to_number(chunks[0], &alphabet_without_separator); + ret.push(num); - if chunks.len() > 1 { - alphabet = self.shuffle(&alphabet); - } - } + if chunks.len() > 1 { + alphabet = self.shuffle(&alphabet); + } + } - id = chunks[1..].join(&separator.to_string()); - } + id = chunks[1..].join(&separator.to_string()); + } - ret - } + ret + } - pub fn min_value(&self) -> u64 { - 0 - } + pub fn min_value(&self) -> u64 { + 0 + } - pub fn max_value(&self) -> u64 { - u64::MAX - } + pub fn max_value(&self) -> u64 { + u64::MAX + } - fn encode_numbers(&self, numbers: &[u64], partitioned: bool) -> Result { - let offset = numbers - .iter() - .enumerate() - .fold(numbers.len(), |a, (i, &v)| { - self.alphabet[v as usize % self.alphabet.len()] as usize + i + a - }) - % self.alphabet.len(); + fn encode_numbers(&self, numbers: &[u64], partitioned: bool) -> Result { + let offset = numbers.iter().enumerate().fold(numbers.len(), |a, (i, &v)| { + self.alphabet[v as usize % self.alphabet.len()] as usize + i + a + }) % self.alphabet.len(); - let mut alphabet: Vec = self - .alphabet - .iter() - .cycle() - .skip(offset) - .take(self.alphabet.len()) - .copied() - .collect(); + let mut alphabet: Vec = + self.alphabet.iter().cycle().skip(offset).take(self.alphabet.len()).copied().collect(); - let prefix = alphabet[0]; - let partition = alphabet[1]; + let prefix = alphabet[0]; + let partition = alphabet[1]; - alphabet.remove(1); - alphabet.remove(0); + alphabet.remove(1); + alphabet.remove(0); - let mut ret: Vec = vec![prefix.to_string()]; + let mut ret: Vec = vec![prefix.to_string()]; - for (i, &num) in numbers.iter().enumerate() { - let alphabet_without_separator: Vec = - alphabet.iter().copied().take(alphabet.len() - 1).collect(); - ret.push(self.to_id(num, &alphabet_without_separator)); + for (i, &num) in numbers.iter().enumerate() { + let alphabet_without_separator: Vec = + alphabet.iter().copied().take(alphabet.len() - 1).collect(); + ret.push(self.to_id(num, &alphabet_without_separator)); - if i < numbers.len() - 1 { - let separator = alphabet[alphabet.len() - 1]; + if i < numbers.len() - 1 { + let separator = alphabet[alphabet.len() - 1]; - if partitioned && i == 0 { - ret.push(partition.to_string()); - } else { - ret.push(separator.to_string()); - } + if partitioned && i == 0 { + ret.push(partition.to_string()); + } else { + ret.push(separator.to_string()); + } - alphabet = self.shuffle(&alphabet); - } - } + alphabet = self.shuffle(&alphabet); + } + } - let mut id = ret.join(""); + let mut id = ret.join(""); - if self.min_length > id.len() { - if !partitioned { - let mut new_numbers = vec![0]; - new_numbers.extend_from_slice(numbers); - id = self.encode_numbers(&new_numbers, true)?; - } + if self.min_length > id.len() { + if !partitioned { + let mut new_numbers = vec![0]; + new_numbers.extend_from_slice(numbers); + id = self.encode_numbers(&new_numbers, true)?; + } - if self.min_length > id.len() { - let mut new_id = id.clone(); - let alphabet_slice = &alphabet[..(self.min_length - id.len())]; - new_id.push_str(&alphabet_slice.iter().collect::()); - new_id.push_str(&id[1..]); - id = new_id; - } - } + if self.min_length > id.len() { + let mut new_id = id.clone(); + let alphabet_slice = &alphabet[..(self.min_length - id.len())]; + new_id.push_str(&alphabet_slice.iter().collect::()); + new_id.push_str(&id[1..]); + id = new_id; + } + } - if self.is_blocked_id(&id) { - let mut new_numbers; + if self.is_blocked_id(&id) { + let mut new_numbers; - if partitioned { - if numbers[0] + 1 > self.max_value() { - return Err(Error::BlocklistOutOfRange); - } else { - new_numbers = numbers.to_vec(); - new_numbers[0] += 1; - } - } else { - new_numbers = vec![0]; - new_numbers.extend_from_slice(numbers); - } + if partitioned { + if numbers[0] + 1 > self.max_value() { + return Err(Error::BlocklistOutOfRange); + } else { + new_numbers = numbers.to_vec(); + new_numbers[0] += 1; + } + } else { + new_numbers = vec![0]; + new_numbers.extend_from_slice(numbers); + } - id = self.encode_numbers(&new_numbers, true)?; - } + id = self.encode_numbers(&new_numbers, true)?; + } - Ok(id) - } + Ok(id) + } - fn to_id(&self, num: u64, alphabet: &[char]) -> String { - let mut id = Vec::new(); - let mut result = num; + fn to_id(&self, num: u64, alphabet: &[char]) -> String { + let mut id = Vec::new(); + let mut result = num; - loop { - let idx = (result % alphabet.len() as u64) as usize; - id.insert(0, alphabet[idx]); - result /= alphabet.len() as u64; + loop { + let idx = (result % alphabet.len() as u64) as usize; + id.insert(0, alphabet[idx]); + result /= alphabet.len() as u64; - if result == 0 { - break; - } - } + if result == 0 { + break; + } + } - id.into_iter().collect() - } + id.into_iter().collect() + } - fn to_number(&self, id: &str, alphabet: &[char]) -> u64 { - let mut result = 0; + fn to_number(&self, id: &str, alphabet: &[char]) -> u64 { + let mut result = 0; - for c in id.chars() { - let idx = alphabet.iter().position(|&x| x == c).unwrap(); - result = result * alphabet.len() as u64 + idx as u64; - } + for c in id.chars() { + let idx = alphabet.iter().position(|&x| x == c).unwrap(); + result = result * alphabet.len() as u64 + idx as u64; + } - result - } + result + } - fn shuffle(&self, alphabet: &[char]) -> Vec { - let mut chars: Vec = alphabet.to_vec(); + fn shuffle(&self, alphabet: &[char]) -> Vec { + let mut chars: Vec = alphabet.to_vec(); - for i in 0..(chars.len() - 1) { - let j = chars.len() - 1 - i; - let r = (i as u32 * j as u32 + chars[i] as u32 + chars[j] as u32) % chars.len() as u32; - chars.swap(i, r as usize); - } + for i in 0..(chars.len() - 1) { + let j = chars.len() - 1 - i; + let r = (i as u32 * j as u32 + chars[i] as u32 + chars[j] as u32) % chars.len() as u32; + chars.swap(i, r as usize); + } - chars - } + chars + } - fn is_blocked_id(&self, id: &str) -> bool { - let id = id.to_lowercase(); + fn is_blocked_id(&self, id: &str) -> bool { + let id = id.to_lowercase(); - for word in &self.blocklist { - if word.len() <= id.len() { - if id.len() <= 3 || word.len() <= 3 { - if id == *word { - return true; - } - } else if word.chars().any(|c| c.is_ascii_digit()) { - if id.starts_with(word) || id.ends_with(word) { - return true; - } - } else if id.contains(word) { - return true; - } - } - } + for word in &self.blocklist { + if word.len() <= id.len() { + if id.len() <= 3 || word.len() <= 3 { + if id == *word { + return true; + } + } else if word.chars().any(|c| c.is_ascii_digit()) { + if id.starts_with(word) || id.ends_with(word) { + return true; + } + } else if id.contains(word) { + return true; + } + } + } - false - } + false + } } diff --git a/tests/encoding.rs b/tests/encoding.rs index 1a7cf1f..a00890b 100644 --- a/tests/encoding.rs +++ b/tests/encoding.rs @@ -2,10 +2,128 @@ use sqids::*; #[test] fn simple() { - let id = "8QRLaD".to_string(); - let numbers = vec![1, 2, 3]; + let sqids = Sqids::new(None).unwrap(); - let sqids = Sqids::new(None).unwrap(); - assert_eq!(sqids.encode(&numbers).unwrap(), id); - assert_eq!(sqids.decode(&id), numbers); + let numbers = vec![1, 2, 3]; + let id = "8QRLaD"; + + assert_eq!(sqids.encode(&numbers).unwrap(), id); + assert_eq!(sqids.decode(id), numbers); +} + +#[test] +fn different_inputs() { + let sqids = Sqids::new(None).unwrap(); + + let numbers = vec![0, 0, 0, 1, 2, 3, 100, 1_000, 100_000, 1_000_000, sqids.max_value()]; + + assert_eq!(sqids.decode(&sqids.encode(&numbers).unwrap()), numbers); +} + +#[test] +fn incremental_numbers() { + let sqids = Sqids::new(None).unwrap(); + + let ids = vec![ + ("bV", vec![0]), + ("U9", vec![1]), + ("g8", vec![2]), + ("Ez", vec![3]), + ("V8", vec![4]), + ("ul", vec![5]), + ("O3", vec![6]), + ("AF", vec![7]), + ("ph", vec![8]), + ("n8", vec![9]), + ]; + + for (id, numbers) in ids { + assert_eq!(sqids.encode(&numbers).unwrap(), id); + assert_eq!(sqids.decode(id), numbers); + } +} + +#[test] +fn incremental_numbers_same_index_0() { + let sqids = Sqids::new(None).unwrap(); + + let ids = vec![ + ("SrIu", vec![0, 0]), + ("nZqE", vec![0, 1]), + ("tJyf", vec![0, 2]), + ("e86S", vec![0, 3]), + ("rtC7", vec![0, 4]), + ("sQ8R", vec![0, 5]), + ("uz2n", vec![0, 6]), + ("7Td9", vec![0, 7]), + ("3nWE", vec![0, 8]), + ("mIxM", vec![0, 9]), + ]; + + for (id, numbers) in ids { + assert_eq!(sqids.encode(&numbers).unwrap(), id); + assert_eq!(sqids.decode(id), numbers); + } +} + +#[test] +fn incremental_numbers_same_index_1() { + let sqids = Sqids::new(None).unwrap(); + + let ids = vec![ + ("SrIu", vec![0, 0]), + ("nbqh", vec![1, 0]), + ("t4yj", vec![2, 0]), + ("eQ6L", vec![3, 0]), + ("r4Cc", vec![4, 0]), + ("sL82", vec![5, 0]), + ("uo2f", vec![6, 0]), + ("7Zdq", vec![7, 0]), + ("36Wf", vec![8, 0]), + ("m4xT", vec![9, 0]), + ]; + + for (id, numbers) in ids { + assert_eq!(sqids.encode(&numbers).unwrap(), id); + assert_eq!(sqids.decode(id), numbers); + } +} + +#[test] +fn multi_input() { + let sqids = Sqids::new(None).unwrap(); + + let numbers: Vec = (0..100).collect(); + let output = sqids.decode(&sqids.encode(&numbers).unwrap()); + + assert_eq!(numbers, output); +} + +#[test] +fn encoding_no_numbers() { + let sqids = Sqids::new(None).unwrap(); + assert_eq!(sqids.encode(&[]).unwrap(), ""); +} + +#[test] +fn decoding_empty_string() { + let sqids = Sqids::new(None).unwrap(); + let numbers: Vec = vec![]; + assert_eq!(sqids.decode(""), numbers); +} + +#[test] +fn decoding_invalid_character() { + let sqids = Sqids::new(None).unwrap(); + let numbers: Vec = vec![]; + assert_eq!(sqids.decode("*"), numbers); +} + +#[test] +#[should_panic] +fn encode_out_of_range_numbers() { + let sqids = Sqids::new(None).unwrap(); + + assert!(sqids.encode(&[sqids.min_value() - 1]).is_err()); + assert!(sqids.encode(&[sqids.max_value() + 1]).is_err()); }