First draft of optimisation

This commit is contained in:
Hiers 2025-01-27 17:18:33 +00:00
parent 6598a9407e
commit c8100a4257
2 changed files with 128 additions and 113 deletions

View file

@ -6,143 +6,150 @@ use std::{
use kradical_parsing::radk; use kradical_parsing::radk;
pub fn search_by_radical(query: &mut String, radk_list: &[radk::Membership], stroke_info: &[HashSet<char>]) -> Option<()> { pub fn search_by_radical<'a>(query: &mut String, radk_list: *const Vec<radk::Membership>, stroke_info: &[HashSet<char>], result: &mut HashSet<&'a String>, aux: &mut HashSet<&'a String>, vec: &mut Vec<Vec<&'a String>>) -> Option<()> {
let mut result: HashSet<_> = HashSet::new(); //let mut result: HashSet<_> = HashSet::new();
let mut aux: HashSet<_> = HashSet::new(); //let mut aux: HashSet<_> = HashSet::new();
unsafe {
if !radk_list.is_empty() && !stroke_info.is_empty() { if !(*radk_list).is_empty() && !stroke_info.is_empty() {
result.clear(); result.clear();
for i in 0..30 {
/* First iteration: get the baseline for the results */ vec[i].clear();
let mut rad = query.chars().nth(1).unwrap();
if rad == '*' || rad == '' {
/* if search_by_strokes failed, then something is very wrong */
rad = search_by_strokes(query, radk_list, 1)?;
}
for k in radk_list.iter() {
if k.radical.glyph.contains(rad) {
for input in &k.kanji {
result.insert(input);
}
break;
} }
}
/* Iterate until you've exhausted user input: refine the baseline to get final output */ /* First iteration: get the baseline for the results */
for (i, mut rad) in query.clone().chars().skip(2).enumerate() { let mut rad = query.chars().nth(1).unwrap();
if rad == '*' || rad == '' { if rad == '*' || rad == '' {
/* if search_by_strokes failed, then something is very wrong */ /* if search_by_strokes failed, then something is very wrong */
rad = search_by_strokes(query, radk_list, i+2)?; rad = search_by_strokes(query, radk_list, 1)?;
} }
for k in radk_list.iter() { for k in (*radk_list).iter() {
if k.radical.glyph.contains(rad) { if k.radical.glyph.contains(rad) {
for input in &k.kanji { for input in &k.kanji {
aux.insert(input); result.insert(input);
} }
result = &result & &aux;
aux.clear();
break; break;
} }
} }
}
/* Hash sets are unordered; Will now order the results by number of strokes */ /* Iterate until you've exhausted user input: refine the baseline to get final output */
let mut vec: Vec<Vec<&String>> = Vec::with_capacity(30); /* The kanji we care about will have at most 30 strokes */ for (i, mut rad) in query.clone().chars().skip(2).enumerate() {
for _i in 0..29 { if rad == '*' || rad == '' {
vec.push(Vec::new()); /* if search_by_strokes failed, then something is very wrong */
} rad = search_by_strokes(query, radk_list, i+2)?;
}
/* for k in (*radk_list).iter() {
* A vector of vectors is useful here to store kanji by number of strokes if k.radical.glyph.contains(rad) {
* First vector's index will indicate the number of strokes (minus 1 because it starts at 0) for input in &k.kanji {
* Second vector will hold all of the kanji that is written in that number of strokes aux.insert(input);
*/ }
for r in &result { *result = &*result & &*aux;
for (i, s) in stroke_info.iter().enumerate() { aux.clear();
if s.contains(&(r.chars().next().unwrap())) { /* r is a String that has just one character */ break;
vec[i].push(r); }
break;
} }
} }
}
for (i, v) in vec.iter().enumerate() { /* Hash sets are unordered; Will now order the results by number of strokes */
if !v.is_empty() { //let mut vec: Vec<Vec<&String>> = Vec::with_capacity(30); /* The kanji we care about will have at most 30 strokes */
let ii = i + 1; //for _i in 0..30 {
print!("\x1b[90m{:02} -\x1b[m", ii); // vec.push(Vec::new());
for l in v { //}
print!(" {l}");
/*
* A vector of vectors is useful here to store kanji by number of strokes
* First vector's index will indicate the number of strokes (minus 1 because it starts at 0)
* Second vector will hold all of the kanji that is written in that number of strokes
*/
for r in result.iter() {
for (i, s) in stroke_info.iter().enumerate() {
if s.contains(&(r.chars().next().unwrap())) { /* r is a String that has just one character */
vec[i].push(r);
break;
}
} }
println!();
} }
for (i, v) in vec.iter().enumerate() {
if !v.is_empty() {
let ii = i + 1;
print!("\x1b[90m{:02} -\x1b[m", ii);
for l in v {
print!(" {l}");
}
println!();
}
}
} else if (*radk_list).is_empty() {
eprintln!("Error while reading radkfile\nIf you don't have the radkfile, download it from\n\
https://www.edrdg.org/krad/kradinf.html and place it in \"~/.local/share/\" on Linux or \"~\\AppData\\Local\\\" on Windows.\n\
This file is needed to search radicals by strokes.");
} else {
eprintln!("File \"/usr/local/share/ykdt/kanji_strokes\" is missing!");
} }
} else if radk_list.is_empty() {
eprintln!("Error while reading radkfile\nIf you don't have the radkfile, download it from\n\
https://www.edrdg.org/krad/kradinf.html and place it in \"~/.local/share/\" on Linux or \"~\\AppData\\Local\\\" on Windows.\n\
This file is needed to search radicals by strokes.");
} else {
eprintln!("File \"/usr/local/share/ykdt/kanji_strokes\" is missing!");
} }
Some(()) Some(())
} }
fn search_by_strokes(query: &mut String, radk_list: &[radk::Membership], n: usize) -> Option<char> { fn search_by_strokes(query: &mut String, radk_list: *const Vec<radk::Membership>, n: usize) -> Option<char> {
let mut strokes = String::new(); let mut strokes = String::new();
let mut radicals: Vec<char> = Vec::new(); let mut radicals: Vec<char> = Vec::new();
let rad; let rad;
loop{
print!("How many strokes does your radical have? ");
stdout().flush().ok()?;
strokes.clear();
if stdin().read_line(&mut strokes).ok()? == 0{
std::process::exit(0);
}
match strokes.trim().parse::<u8>() { unsafe {
Ok(strk) => { loop {
let mut i = 1; print!("How many strokes does your radical have? ");
for k in radk_list.iter() { stdout().flush().ok()?;
if k.radical.strokes == strk { strokes.clear();
print!("{}{} ", i, k.radical.glyph); if stdin().read_line(&mut strokes).ok()? == 0{
radicals.push(k.radical.glyph.chars().next()?); std::process::exit(0);
i += 1; }
} else if k.radical.strokes > strk {
println!();
break;
}
}
loop {
print!("Choose the radical to use for your search: ");
stdout().flush().ok()?;
strokes.clear();
if stdin().read_line(&mut strokes).ok()? == 0{
std::process::exit(0);
}
match strokes.trim().parse::<usize>() { match strokes.trim().parse::<u8>() {
Ok(strk) => { Ok(strk) => {
if strk < 1 || strk > i-1 { let mut i = 1;
eprintln!("Couldn't parse input: number not in range"); for k in (*radk_list).iter() {
} else { if k.radical.strokes == strk {
rad = radicals.get(strk-1)?; print!("{}{} ", i, k.radical.glyph);
/* UTF-8 is not fun */ radicals.push(k.radical.glyph.chars().next()?);
let char_and_index = query.char_indices().nth(n)?; i += 1;
query.replace_range(char_and_index.0.. } else if k.radical.strokes > strk {
char_and_index.0 + println!();
char_and_index.1.len_utf8(), break;
rad.to_string().as_str()); }
println!("\x1b[90m{}\x1b[m", query);
return Some(*rad);
}
},
Err(e) => { eprintln!("{e}"); }
} }
} loop {
}, print!("Choose the radical to use for your search: ");
Err(e) => { eprintln!("{e}") } stdout().flush().ok()?;
strokes.clear();
if stdin().read_line(&mut strokes).ok()? == 0{
std::process::exit(0);
}
match strokes.trim().parse::<usize>() {
Ok(strk) => {
if strk < 1 || strk > i-1 {
eprintln!("Couldn't parse input: number not in range");
} else {
rad = radicals.get(strk-1)?;
/* UTF-8 is not fun */
let char_and_index = query.char_indices().nth(n)?;
query.replace_range(char_and_index.0..
char_and_index.0 +
char_and_index.1.len_utf8(),
rad.to_string().as_str());
println!("\x1b[90m{}\x1b[m", query);
return Some(*rad);
}
},
Err(e) => { eprintln!("{e}"); }
}
}
},
Err(e) => { eprintln!("{e}") }
}
} }
} }
} }

View file

@ -6,7 +6,9 @@ use std::{
io::{stdin, stdout, Write, IsTerminal}, io::{stdin, stdout, Write, IsTerminal},
path::PathBuf, path::PathBuf,
process::{Command, Stdio}, process::{Command, Stdio},
cell::RefCell,
env, env,
collections::HashSet,
}; };
use word_search::word_search; use word_search::word_search;
@ -42,10 +44,18 @@ fn main() -> Result<(), ureq::Error> {
}; };
let path = get_radkfile_path().unwrap(); let path = get_radkfile_path().unwrap();
let mut radk_list = Vec::new(); //let mut radk_list = Vec::new();
let radk_list = RefCell::new(Vec::new());
let mut stroke_info = Vec::new(); let mut stroke_info = Vec::new();
let mut try_load = true; let mut try_load = true;
let mut result: HashSet<&String> = HashSet::new();
let mut aux: HashSet<&String> = HashSet::new();
let mut vec: Vec<Vec<&String>> = Vec::with_capacity(30); /* The kanji we care about will have at most 30 strokes */
for _i in 0..30 {
vec.push(Vec::new());
}
let options = parse_args(); let options = parse_args();
@ -74,10 +84,9 @@ fn main() -> Result<(), ureq::Error> {
if query.starts_with(':') || query.starts_with('') { /* Kanji search */ if query.starts_with(':') || query.starts_with('') { /* Kanji search */
if try_load { if try_load {
radk_list = { match radk::parse_file(&path) { match radk::parse_file(&path) {
Ok(radk_list) => radk_list, Ok(list) => { radk_list.replace(list); },
Err(_e) => radk_list, Err(_e) => ()
}
}; };
stroke_info = { match get_stroke_info() { stroke_info = { match get_stroke_info() {
Ok(stroke_info) => stroke_info, Ok(stroke_info) => stroke_info,
@ -87,10 +96,9 @@ fn main() -> Result<(), ureq::Error> {
try_load = false; try_load = false;
} }
/* if search_by_radical failed, then something is very wrong */ /* if search_by_radical failed, then something is very wrong */
if search_by_radical(&mut query, &radk_list, &stroke_info).is_none() { if search_by_radical(&mut query, radk_list.as_ptr(), &stroke_info, &mut result, &mut aux, &mut vec).is_none() {
eprintln!("Couldn't parse input"); eprintln!("Couldn't parse input");
} }
} else if query.starts_with('_') || query.starts_with('_') { /* Sentence search */ } else if query.starts_with('_') || query.starts_with('_') { /* Sentence search */
let bytes = query.chars().next().unwrap().len_utf8(); let bytes = query.chars().next().unwrap().len_utf8();