Split kanji search by radical functions.
Put them in another file in preparation for adding sentence searching which will add considerable complexity to the code.
This commit is contained in:
parent
683610bf2c
commit
93576a3e34
2 changed files with 165 additions and 156 deletions
161
src/kanji_search.rs
Normal file
161
src/kanji_search.rs
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
use std::{
|
||||||
|
io::{stdin, stdout, Write},
|
||||||
|
path::PathBuf,
|
||||||
|
collections::HashSet,
|
||||||
|
};
|
||||||
|
|
||||||
|
use colored::*;
|
||||||
|
use kradical_parsing::radk;
|
||||||
|
|
||||||
|
pub fn search_by_radical(mut query: &mut String){
|
||||||
|
let mut result: HashSet<_> = HashSet::new();
|
||||||
|
let mut aux: HashSet<_> = HashSet::new();
|
||||||
|
let path = get_radkfile_path();
|
||||||
|
|
||||||
|
match radk::parse_file(path.unwrap()) { /* if it doesn't exist, just panic */
|
||||||
|
Ok(radk_list) => {
|
||||||
|
result.clear();
|
||||||
|
|
||||||
|
/* First iteration: get the baseline for the results */
|
||||||
|
let mut rad = query.chars().nth(1).unwrap();
|
||||||
|
if rad == '*' || rad == '*' {
|
||||||
|
/* if search_by_radical returned an error then something is very wrong */
|
||||||
|
rad = search_by_strokes(&mut query, &radk_list, 1).expect("Couldn't parse input");
|
||||||
|
}
|
||||||
|
|
||||||
|
for k in radk_list.iter() {
|
||||||
|
if k.radical.glyph.contains(rad) {
|
||||||
|
for input in &k.kanji {
|
||||||
|
result.insert(input);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Iterate until you've exhausted user input: refine the baseline to get final output */
|
||||||
|
for (i, mut rad) in query.clone().chars().skip(2).enumerate() {
|
||||||
|
if rad == '*' || rad == '*' {
|
||||||
|
/* if search_by_radical returned an error then something is very wrong */
|
||||||
|
rad = search_by_strokes(&mut query, &radk_list, i+2).expect("Couldn't parse input");
|
||||||
|
}
|
||||||
|
|
||||||
|
for k in radk_list.iter() {
|
||||||
|
if k.radical.glyph.contains(rad) {
|
||||||
|
for input in &k.kanji {
|
||||||
|
aux.insert(input);
|
||||||
|
}
|
||||||
|
result = &result & &aux;
|
||||||
|
aux.clear();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for r in result {
|
||||||
|
print!("{r} ");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
Err(_e) => eprintln!("Error while reading radkfile\nIf you don't have the radkfile, download it from \
|
||||||
|
https://www.edrdg.org/krad/kradinf.html and place it in \"~/.local/share/\" on Linux or \"~\\AppData\\Local\\\" on Windows. \
|
||||||
|
This file is needed to search radicals by strokes."),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn get_radkfile_path() -> Option<PathBuf> {
|
||||||
|
#[allow(deprecated)] /* obviously no windows problem here */
|
||||||
|
std::env::home_dir()
|
||||||
|
.map(|path| path.join(".local/share/radkfile"))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(windows)]
|
||||||
|
/* Nicked this section straight from https://github.com/rust-lang/cargo/blob/master/crates/home/src/windows.rs */
|
||||||
|
extern "C" {
|
||||||
|
fn wcslen(buf: *const u16) -> usize;
|
||||||
|
}
|
||||||
|
#[cfg(windows)]
|
||||||
|
fn get_radkfile_path() -> Option<PathBuf> {
|
||||||
|
use std::ffi::OsString;
|
||||||
|
use std::os::windows::ffi::OsStringExt;
|
||||||
|
use windows_sys::Win32::Foundation::{MAX_PATH, S_OK};
|
||||||
|
use windows_sys::Win32::UI::Shell::{SHGetFolderPathW, CSIDL_PROFILE};
|
||||||
|
|
||||||
|
match env::var_os("USERPROFILE").filter(|s| !s.is_empty()).map(PathBuf::from) {
|
||||||
|
Some(path) => {
|
||||||
|
Some(path.join("Appdata\\Local\\radkfile"))
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
unsafe {
|
||||||
|
let mut path: Vec<u16> = Vec::with_capacity(MAX_PATH as usize);
|
||||||
|
match SHGetFolderPathW(0, CSIDL_PROFILE as i32, 0, 0, path.as_mut_ptr()) {
|
||||||
|
S_OK => {
|
||||||
|
let len = wcslen(path.as_ptr());
|
||||||
|
path.set_len(len);
|
||||||
|
let s = OsString::from_wide(&path);
|
||||||
|
Some(PathBuf::from(s).join("Appdata\\Local\\radkfile"))
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn search_by_strokes(query: &mut String, radk_list: &[radk::Membership], n: usize) -> Result<char, std::io::Error> {
|
||||||
|
|
||||||
|
let mut strokes = String::new();
|
||||||
|
let mut radicals: Vec<char> = Vec::new();
|
||||||
|
let rad;
|
||||||
|
loop{
|
||||||
|
print!("How many strokes does your radical have? ");
|
||||||
|
stdout().flush()?;
|
||||||
|
strokes.clear();
|
||||||
|
if (stdin().read_line(&mut strokes).expect("Can't read from stdin")) == 0 {
|
||||||
|
std::process::exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
match strokes.trim().parse::<u8>() {
|
||||||
|
Ok(strk) => {
|
||||||
|
let mut i = 1;
|
||||||
|
for k in radk_list.iter() {
|
||||||
|
if k.radical.strokes == strk {
|
||||||
|
print!("{}{} ", i, k.radical.glyph);
|
||||||
|
radicals.push(k.radical.glyph.chars().next().unwrap());
|
||||||
|
i += 1;
|
||||||
|
} else if k.radical.strokes > strk {
|
||||||
|
println!();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
loop {
|
||||||
|
print!("Choose the radical to use for your search: ");
|
||||||
|
stdout().flush()?;
|
||||||
|
strokes.clear();
|
||||||
|
if (stdin().read_line(&mut strokes).expect("Can't read from stdin")) == 0 {
|
||||||
|
std::process::exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
match strokes.trim().parse::<usize>() {
|
||||||
|
Ok(strk) => {
|
||||||
|
if strk < 1 || strk > i-1 {
|
||||||
|
eprintln!("Couldn't parse input: number not in range");
|
||||||
|
} else {
|
||||||
|
rad = radicals.get(strk-1).unwrap();
|
||||||
|
/* UTF-8 is not fun */
|
||||||
|
let char_and_index = query.char_indices().nth(n).unwrap();
|
||||||
|
query.replace_range(char_and_index.0..
|
||||||
|
char_and_index.0 +
|
||||||
|
char_and_index.1.len_utf8(),
|
||||||
|
rad.to_string().as_str());
|
||||||
|
println!("{}", query.as_str().bright_black());
|
||||||
|
return Ok(*rad);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(e) => { eprintln!("{e}"); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(e) => { eprintln!("{e}") }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
160
src/main.rs
160
src/main.rs
|
@ -1,17 +1,17 @@
|
||||||
|
mod kanji_search;
|
||||||
use std::{
|
use std::{
|
||||||
io::{stdin, stdout, Write},
|
io::{stdin, stdout, Write},
|
||||||
process::{Command, Stdio},
|
process::{Command, Stdio},
|
||||||
path::PathBuf,
|
|
||||||
collections::HashSet,
|
|
||||||
error::Error,
|
error::Error,
|
||||||
env,
|
env,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use kanji_search::search_by_radical;
|
||||||
|
|
||||||
use argparse::{ArgumentParser, List, Print, Store, StoreTrue};
|
use argparse::{ArgumentParser, List, Print, Store, StoreTrue};
|
||||||
use colored::*;
|
use colored::*;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use atty::Stream;
|
use atty::Stream;
|
||||||
use kradical_parsing::radk;
|
|
||||||
|
|
||||||
macro_rules! JISHO_URL {
|
macro_rules! JISHO_URL {
|
||||||
() => {
|
() => {
|
||||||
|
@ -61,61 +61,8 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||||
let mut lines_output = 0;
|
let mut lines_output = 0;
|
||||||
let mut output = String::with_capacity(5242880); /* Give output 5MB of buffer; Should be enough to avoid reallocs*/
|
let mut output = String::with_capacity(5242880); /* Give output 5MB of buffer; Should be enough to avoid reallocs*/
|
||||||
|
|
||||||
/* for kanji radical search */
|
|
||||||
let mut result: HashSet<_> = HashSet::new();
|
|
||||||
let mut aux: HashSet<_> = HashSet::new();
|
|
||||||
|
|
||||||
if query.starts_with(':') || query.starts_with(':') {
|
if query.starts_with(':') || query.starts_with(':') {
|
||||||
|
search_by_radical(&mut query);
|
||||||
let path = get_radkfile_path();
|
|
||||||
|
|
||||||
match radk::parse_file(path.unwrap()) { /* if it doesn't exist, just panic */
|
|
||||||
Ok(radk_list) => {
|
|
||||||
result.clear();
|
|
||||||
|
|
||||||
/* First iteration: get the baseline for the results */
|
|
||||||
let mut rad = query.chars().nth(1).unwrap();
|
|
||||||
if rad == '*' || rad == '*' {
|
|
||||||
/* if search_by_radical returned an error then something is very wrong */
|
|
||||||
rad = search_by_strokes(&mut query, &radk_list, 1).expect("Couldn't parse input");
|
|
||||||
}
|
|
||||||
|
|
||||||
for k in radk_list.iter() {
|
|
||||||
if k.radical.glyph.contains(rad) {
|
|
||||||
for input in &k.kanji {
|
|
||||||
result.insert(input);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Iterate until you've exhausted user input: refine the baseline to get final output */
|
|
||||||
for (i, mut rad) in query.clone().chars().skip(2).enumerate() {
|
|
||||||
if rad == '*' || rad == '*' {
|
|
||||||
/* if search_by_radical returned an error then something is very wrong */
|
|
||||||
rad = search_by_strokes(&mut query, &radk_list, i+2).expect("Couldn't parse input");
|
|
||||||
}
|
|
||||||
|
|
||||||
for k in radk_list.iter() {
|
|
||||||
if k.radical.glyph.contains(rad) {
|
|
||||||
for input in &k.kanji {
|
|
||||||
aux.insert(input);
|
|
||||||
}
|
|
||||||
result = &result & &aux;
|
|
||||||
aux.clear();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for r in result {
|
|
||||||
print!("{r} ");
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
}
|
|
||||||
Err(_e) => eprintln!("Error while reading radkfile\nIf you don't have the radkfile, download it from \
|
|
||||||
https://www.edrdg.org/krad/kradinf.html and place it in \"~/.local/share/\" on Linux or \"~\\AppData\\Local\\\" on Windows. \
|
|
||||||
This file is needed to search radicals by strokes."),
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Do API request
|
// Do API request
|
||||||
let body: Value = ureq::get(&format!(JISHO_URL!(), query))
|
let body: Value = ureq::get(&format!(JISHO_URL!(), query))
|
||||||
|
@ -406,65 +353,6 @@ fn parse_args() -> Options {
|
||||||
options
|
options
|
||||||
}
|
}
|
||||||
|
|
||||||
fn search_by_strokes(query: &mut String, radk_list: &[radk::Membership], n: usize) -> Result<char, std::io::Error> {
|
|
||||||
|
|
||||||
let mut strokes = String::new();
|
|
||||||
let mut radicals: Vec<char> = Vec::new();
|
|
||||||
let rad;
|
|
||||||
loop{
|
|
||||||
print!("How many strokes does your radical have? ");
|
|
||||||
stdout().flush()?;
|
|
||||||
strokes.clear();
|
|
||||||
if (stdin().read_line(&mut strokes).expect("Can't read from stdin")) == 0 {
|
|
||||||
std::process::exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
match strokes.trim().parse::<u8>() {
|
|
||||||
Ok(strk) => {
|
|
||||||
let mut i = 1;
|
|
||||||
for k in radk_list.iter() {
|
|
||||||
if k.radical.strokes == strk {
|
|
||||||
print!("{}{} ", i, k.radical.glyph);
|
|
||||||
radicals.push(k.radical.glyph.chars().next().unwrap());
|
|
||||||
i += 1;
|
|
||||||
} else if k.radical.strokes > strk {
|
|
||||||
println!();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
loop {
|
|
||||||
print!("Choose the radical to use for your search: ");
|
|
||||||
stdout().flush()?;
|
|
||||||
strokes.clear();
|
|
||||||
if (stdin().read_line(&mut strokes).expect("Can't read from stdin")) == 0 {
|
|
||||||
std::process::exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
match strokes.trim().parse::<usize>() {
|
|
||||||
Ok(strk) => {
|
|
||||||
if strk < 1 || strk > i-1 {
|
|
||||||
eprintln!("Couldn't parse input: number not in range");
|
|
||||||
} else {
|
|
||||||
rad = radicals.get(strk-1).unwrap();
|
|
||||||
/* UTF-8 is not fun */
|
|
||||||
let char_and_index = query.char_indices().nth(n).unwrap();
|
|
||||||
query.replace_range(char_and_index.0..
|
|
||||||
char_and_index.0 +
|
|
||||||
char_and_index.1.len_utf8(),
|
|
||||||
rad.to_string().as_str());
|
|
||||||
println!("{}", query.as_str().bright_black());
|
|
||||||
return Ok(*rad);
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Err(e) => { eprintln!("{e}"); }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Err(e) => { eprintln!("{e}") }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn pipe_to_less(output: String) {
|
fn pipe_to_less(output: String) {
|
||||||
|
|
||||||
let command = Command::new("less")
|
let command = Command::new("less")
|
||||||
|
@ -539,43 +427,3 @@ fn terminal_size() -> Result<usize, i16> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
|
||||||
fn get_radkfile_path() -> Option<PathBuf> {
|
|
||||||
#[allow(deprecated)] /* obviously no windows problem here */
|
|
||||||
std::env::home_dir()
|
|
||||||
.map(|path| path.join(".local/share/radkfile"))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(windows)]
|
|
||||||
/* Nicked this section straight from https://github.com/rust-lang/cargo/blob/master/crates/home/src/windows.rs */
|
|
||||||
extern "C" {
|
|
||||||
fn wcslen(buf: *const u16) -> usize;
|
|
||||||
}
|
|
||||||
#[cfg(windows)]
|
|
||||||
fn get_radkfile_path() -> Option<PathBuf> {
|
|
||||||
use std::ffi::OsString;
|
|
||||||
use std::os::windows::ffi::OsStringExt;
|
|
||||||
use windows_sys::Win32::Foundation::{MAX_PATH, S_OK};
|
|
||||||
use windows_sys::Win32::UI::Shell::{SHGetFolderPathW, CSIDL_PROFILE};
|
|
||||||
|
|
||||||
match env::var_os("USERPROFILE").filter(|s| !s.is_empty()).map(PathBuf::from) {
|
|
||||||
Some(path) => {
|
|
||||||
Some(path.join("Appdata\\Local\\radkfile"))
|
|
||||||
},
|
|
||||||
None => {
|
|
||||||
unsafe {
|
|
||||||
let mut path: Vec<u16> = Vec::with_capacity(MAX_PATH as usize);
|
|
||||||
match SHGetFolderPathW(0, CSIDL_PROFILE as i32, 0, 0, path.as_mut_ptr()) {
|
|
||||||
S_OK => {
|
|
||||||
let len = wcslen(path.as_ptr());
|
|
||||||
path.set_len(len);
|
|
||||||
let s = OsString::from_wide(&path);
|
|
||||||
Some(PathBuf::from(s).join("Appdata\\Local\\radkfile"))
|
|
||||||
}
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue