Skip to content

Commit

Permalink
Merge pull request #43 from sts10/both-schlinkert
Browse files Browse the repository at this point in the history
Perform Schlinkert pruning both forwards and reverse, picking whichever saves more words
  • Loading branch information
sts10 authored May 1, 2023
2 parents d411f6a + 9f52367 commit 8bff8d5
Show file tree
Hide file tree
Showing 9 changed files with 107 additions and 23 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "tidy"
version = "0.2.88"
version = "0.2.90"
authors = ["sts10 <[email protected]>"]
license = "MIT"
edition = "2021"
Expand Down
6 changes: 4 additions & 2 deletions readme.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,11 @@ I occasionally [build releases](https://github.com/sts10/tidy/releases) of Tidy

## For Tidy developers

Run all code tests: `cargo test`
* Run all code tests: `cargo test`
* Generate docs: `cargo doc --document-private-items --no-deps`. Add `--open` flag to open docs after generation. Locally, docs are printed to `./target/doc/tidy/index.html`.
* Check license compatibility of Tidy's dependencies: `cargo deny check licenses` (requires that you [have cargo-deny installed locally](https://github.com/EmbarkStudios/cargo-deny#install-cargo-deny))

Generate docs: `cargo doc --document-private-items --no-deps`. Add `--open` flag to open docs after generation. Locally, docs are printed to `./target/doc/tidy/index.html`.
Pull Requests welcome!

## Blog posts related to this project

Expand Down
7 changes: 5 additions & 2 deletions src/display_information/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
pub mod uniquely_decodable;
use crate::count_characters;
use crate::display_information::uniquely_decodable::check_decodability;
use crate::display_information::uniquely_decodable::is_uniquely_decodable;
use crate::parse_delimiter;
use crate::split_and_vectorize;

Expand Down Expand Up @@ -78,7 +78,10 @@ pub fn display_list_information(

// At least for now, this one is EXPENSIVE
if level >= 4 {
eprintln!("Uniquely decodable? : {}", check_decodability(&list));
eprintln!(
"Uniquely decodable? : {}",
is_uniquely_decodable(&list)
);
}

let entropy_per_word = calc_entropy_per_word(list.len());
Expand Down
7 changes: 3 additions & 4 deletions src/display_information/uniquely_decodable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
/// very closely.
use std::collections::HashSet;

pub fn check_decodability(c: &[String]) -> bool {
// Right off the bat, convert inputted Slice to a HashSet
// Since we always want this list to be unique, and we're
// going to eventually calculate a disjoint boolean!
/// Return true if the list is uniquely decodable, false if not. I
/// don't _think_ we need to check reversed words in this case.
pub fn is_uniquely_decodable(c: &[String]) -> bool {
let c = vec_to_hash(c);
sardinas_patterson_theorem(c)
}
Expand Down
36 changes: 33 additions & 3 deletions src/list_manipulations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ pub fn delete_before_first_char(s: &str, ch: char) -> &str {
/// [a separate repo](https://github.com/sts10/splitter/blob/main/src/lib.rs).
pub fn delete_after_first_char(s: &str, ch: char) -> &str {
match memchr(ch as u8, s.as_bytes()) {
None => s, // not found => return the whole string
None => s, // delimiting character not found in string s, so return the whole string
Some(pos) => &s[0..pos],
}
}
Expand Down Expand Up @@ -141,13 +141,43 @@ pub fn guarantee_maximum_prefix_length(
/// Executes Schlinkert prune. Attempts to make list uniquely decodable
/// by removing the fewest number of code words possible. Adapted from
/// Sardinas-Patterson algorithm.
/// Runs word list both as given and with each word reversed, preferring
/// which ever preserves more words from the given list.
pub fn schlinkert_prune(list: &[String]) -> Vec<String> {
let offenders_to_remove = get_sardinas_patterson_final_intersection(list);
// Clumsily clone the list into a new variable.
let mut new_list = list.to_owned();
new_list.retain(|x| !offenders_to_remove.contains(x));
// First, simply find the "offenders" with the list as given.
let offenders_to_remove_forwards = get_sardinas_patterson_final_intersection(list);
// Now, reverse all words before running the Schlinkert prune.
// This will give a different list of offending words -- and potentially FEWER
// than running the prune forwards. (We call reverse_all_words function
// twice because we have to un-reverse all the offending words at the end.)
let offenders_to_remove_backwards = reverse_all_words(
&get_sardinas_patterson_final_intersection(&reverse_all_words(list)),
);
// If running the prune on the reversed words yielded fewer offenders
// we'll remove those offending words, since our goal is to remove
// the fewest number of words as possible.
if offenders_to_remove_forwards.len() <= offenders_to_remove_backwards.len() {
new_list.retain(|x| !offenders_to_remove_forwards.contains(x));
} else {
new_list.retain(|x| !offenders_to_remove_backwards.contains(x));
}
new_list
}

/// Reverse all words on given list. For example,
/// `["hotdog", "hamburger", "alligator"]` becomes
/// `["godtoh", "regrubmah", "rotagilla"]`
/// Uses graphemes to ensure it handles accented characters correctly.
pub fn reverse_all_words(list: &[String]) -> Vec<String> {
let mut reversed_list = vec![];
for word in list {
reversed_list.push(word.graphemes(true).rev().collect::<String>());
}
reversed_list
}

use unicode_segmentation::UnicodeSegmentation;
/// Given a word and a `usize` of `length`, this function returns
/// the first `length` characters of that word. This length is
Expand Down
13 changes: 13 additions & 0 deletions tests/list_manipulation_tests.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod list_manipulation_tests {
use tidy::dice::print_as_dice; // not exactly sure why I need this here...
use tidy::list_manipulations::reverse_all_words;
use tidy::*;

fn make_lists() -> (Vec<String>, Vec<String>, Vec<String>, Vec<String>) {
Expand Down Expand Up @@ -660,6 +661,18 @@ mod list_manipulation_tests {
);
}

#[test]
fn can_reverse_list() {
let list = vec![
"hotdog".to_string(),
"hamburger".to_string(),
"alligator".to_string(),
"😀😁😆".to_string(),
];
let rev_list = reverse_all_words(&list);
assert_eq!(rev_list, ["godtoh", "regrubmah", "rotagilla", "😆😁😀"]);
}

#[test]
fn can_print_dice_rolls_of_base_6() {
assert_eq!(print_as_dice(0, 6, 7776, false), "11111".to_string());
Expand Down
37 changes: 37 additions & 0 deletions tests/pruning_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
mod pruning_tests {
use tidy::display_information::uniquely_decodable::is_uniquely_decodable;
use tidy::*;

#[test]
fn can_run_schlinkert_prune_on_reversed_list_if_it_saves_more_words() {
let list: Vec<String> = vec![
"news",
"paper",
"newspaper",
"donkey",
"newsdonkey",
"ghost",
"newsghost",
"radish",
"newsradish",
]
.iter()
.map(|w| w.to_string())
.collect();

let this_tidy_request = TidyRequest {
list: list,
should_schlinkert_prune: true,
..Default::default()
};
let new_list = tidy_list(this_tidy_request);
// If Schlinkert prune was done in forwards, only
// 5 words would be saved. But if we Schlinkert
// prune the reversed list, we save 8 words.
assert!(new_list.len() == 8);
// And now let's confirm that the new list is indeed
// uniquely decodable, at least as far as Tidy is able
// to confirm.
assert!(is_uniquely_decodable(&new_list));
}
}
20 changes: 10 additions & 10 deletions tests/uniquely_decodable_tests.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
mod uniquely_decodable_tests {
use tidy::display_information::uniquely_decodable::check_decodability;
use tidy::display_information::uniquely_decodable::is_uniquely_decodable;

#[test]
fn can_determine_a_list_with_prefix_words_is_not_uniquely_decodable() {
Expand All @@ -8,7 +8,7 @@ mod uniquely_decodable_tests {
.map(|x| x.to_string())
.collect();

assert!(!check_decodability(&list));
assert!(!is_uniquely_decodable(&list));

let list2: Vec<String> = vec![
"spill".to_string(),
Expand All @@ -17,7 +17,7 @@ mod uniquely_decodable_tests {
"spills".to_string(),
"unmoved".to_string(),
];
assert!(!check_decodability(&list2));
assert!(!is_uniquely_decodable(&list2));
}

#[test]
Expand All @@ -39,7 +39,7 @@ mod uniquely_decodable_tests {
.iter()
.map(|w| w.to_string())
.collect();
assert!(check_decodability(&list));
assert!(is_uniquely_decodable(&list));
}

#[test]
Expand All @@ -48,7 +48,7 @@ mod uniquely_decodable_tests {
.iter()
.map(|w| w.to_string())
.collect();
assert!(!check_decodability(&list));
assert!(!is_uniquely_decodable(&list));
}

#[test]
Expand All @@ -57,26 +57,26 @@ mod uniquely_decodable_tests {
.iter()
.map(|w| w.to_string())
.collect();
assert!(check_decodability(&list));
assert!(is_uniquely_decodable(&list));

let list: Vec<String> = vec!["0", "10", "010", "101"]
.iter()
.map(|w| w.to_string())
.collect();
assert!(!check_decodability(&list));
assert!(!is_uniquely_decodable(&list));

let list: Vec<String> = vec!["0", "01", "011", "0111"]
.iter()
.map(|w| w.to_string())
.collect();
assert!(check_decodability(&list));
assert!(is_uniquely_decodable(&list));

// '0, 1, 00, 11' is not an uniquely decodable code
let list: Vec<String> = vec!["0", "1", "00", "11"]
.iter()
.map(|w| w.to_string())
.collect();
assert!(!check_decodability(&list));
assert!(!is_uniquely_decodable(&list));
}

#[test]
Expand All @@ -87,6 +87,6 @@ mod uniquely_decodable_tests {
.iter()
.map(|w| w.to_string())
.collect();
assert!(check_decodability(&list));
assert!(is_uniquely_decodable(&list));
}
}

0 comments on commit 8bff8d5

Please sign in to comment.