Skip to content

Commit

Permalink
Merge pull request #47 from sreedevk/development
Browse files Browse the repository at this point in the history
Version 0.1.5
  • Loading branch information
sreedevk authored Jan 26, 2023
2 parents 163e8d8 + 6b06798 commit ab44d1e
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 24 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "deduplicator"
version = "0.1.4"
version = "0.1.5"
edition = "2021"
description = "find,filter,delete Duplicates"
license = "MIT"
Expand Down
39 changes: 31 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,38 @@
## Usage

```bash
Usage: deduplicator [OPTIONS]
Usage: deduplicator [OPTIONS] [scan_dir_path]

Arguments:
[scan_dir_path] Run Deduplicator on dir different from pwd (e.g., ~/Pictures )

Options:
-t, --types <TYPES> Filetypes to deduplicate (default = all)
--dir <DIR> Run Deduplicator on dir different from pwd
-i, --interactive Delete files interactively
-m, --minsize <MINSIZE> Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T). [default = 0]
-h, --help Print help information
-V, --version Print version information
-t, --types <TYPES> Filetypes to deduplicate [default = all]
-i, --interactive Delete files interactively
-s, --min-size <MIN_SIZE> Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T) [default: 1b]
-d, --max-depth <MAX_DEPTH> Max Depth to scan while looking for duplicates
--min-depth <MIN_DEPTH> Min Depth to scan while looking for duplicates
-f, --follow-links Follow links while scanning directories
-h, --help Print help information
-V, --version Print version information
```
### Examples
```bash
# Scan for duplicates recursively from the current dir, only look for png, jpg & pdf file types & interactively delete files
deduplicator -t pdf,jpg,png -i

# Scan for duplicates recursively from the ~/Pictures dir, only look for png, jpeg, jpg & pdf file types & interactively delete files
deduplicator ~/Pictures/ -t png,jpeg,jpg,pdf -i

# Scan for duplicates in the ~/Pictures without recursing into subdirectories
deduplicator ~/Pictures --max-depth 0

# look for duplicates in the ~/.config directory while also recursing into symbolic link paths
deduplicator ~/.config --follow-links

# scan for duplicates that are greater than 100mb in the ~/Media directory
deduplicator ~/Media --min-size 100mb
```
## Installation
Expand Down Expand Up @@ -75,7 +98,7 @@ Deduplicator uses size comparison and fxhash (a non non-cryptographic hashing al
|:---|:---|---:|---:|---:|---:|
| `deduplicator --dir ~/Data/tmp` | (~120G) | 27.5 ± 1.0 | 26.0 | 32.1 | 1.70 ± 0.09 |
| `deduplicator --dir ~/Data/books` | (~8.6G) | 21.8 ± 0.7 | 20.5 | 24.4 | 1.35 ± 0.07 |
| `deduplicator --dir ~/Data/books --minsize 10M` | (~8.6G) | 16.1 ± 0.6 | 14.9 | 18.8 | 1.00 |
| `deduplicator --dir ~/Data/books --min-size 10M` | (~8.6G) | 16.1 ± 0.6 | 14.9 | 18.8 | 1.00 |
| `deduplicator --dir ~/Data/ --types pdf,jpg,png,jpeg` | (~290G) | 1857.4 ± 24.5 | 1817.0 | 1895.5 | 115.07 ± 4.64 |
* The last entry is lower because of the number of files deduplicator had to go through (~660895 Files). The average size of the files rarely affect the performance of deduplicator.
Expand Down
4 changes: 2 additions & 2 deletions src/filters.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::file_manager::File;
use crate::params::Params;

pub fn is_file_gt_minsize(app_opts: &Params, file: &File) -> bool {
match app_opts.get_minsize() {
pub fn is_file_gt_min_size(app_opts: &Params, file: &File) -> bool {
match app_opts.get_min_size() {
Some(msize) => match file.size {
Some(fsize) => fsize >= msize,
None => true,
Expand Down
58 changes: 47 additions & 11 deletions src/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,32 @@ use globwalk::{GlobWalker, GlobWalkerBuilder};
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
pub struct Params {
/// Filetypes to deduplicate (default = all)
/// Filetypes to deduplicate [default = all]
#[arg(short, long)]
pub types: Option<String>,
/// Run Deduplicator on dir different from pwd
#[arg(long, value_hint = ValueHint::DirPath)]
/// Run Deduplicator on dir different from pwd (e.g., ~/Pictures )
#[arg(value_hint = ValueHint::DirPath, value_name = "scan_dir_path")]
pub dir: Option<PathBuf>,
/// Delete files interactively
#[arg(long, short)]
pub interactive: bool,
/// Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T). [default = 0]
/// Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T).
#[arg(long, short = 's', default_value = "1b")]
pub min_size: Option<String>,
/// Max Depth to scan while looking for duplicates
#[arg(long, short = 'd')]
pub max_depth: Option<usize>,
/// Min Depth to scan while looking for duplicates
#[arg(long)]
pub min_depth: Option<usize>,
/// Follow links while scanning directories
#[arg(long, short)]
pub minsize: Option<String>,
pub follow_links: bool,
}

impl Params {
pub fn get_minsize(&self) -> Option<u64> {
match &self.minsize {
pub fn get_min_size(&self) -> Option<u64> {
match &self.min_size {
Some(msize) => match msize.parse::<bytesize::ByteSize>() {
Ok(units) => Some(units.0),
Err(_) => None,
Expand All @@ -39,14 +48,41 @@ impl Params {
Ok(dir)
}

fn add_glob_min_depth(&self, builder: GlobWalkerBuilder) -> Result<GlobWalkerBuilder> {
match self.min_depth {
Some(mindepth) => Ok(builder.min_depth(mindepth)),
None => Ok(builder),
}
}

fn add_glob_max_depth(&self, builder: GlobWalkerBuilder) -> Result<GlobWalkerBuilder> {
match self.max_depth {
Some(maxdepth) => Ok(builder.max_depth(maxdepth)),
None => Ok(builder),
}
}

fn add_glob_follow_links(&self, builder: GlobWalkerBuilder) -> Result<GlobWalkerBuilder> {
match self.follow_links {
true => Ok(builder.follow_links(true)),
false => Ok(builder.follow_links(false)),
}
}

pub fn get_glob_walker(&self) -> Result<GlobWalker> {
let pattern: String = match self.types.as_ref() {
Some(filetypes) => format!("**/*{{{filetypes}}}"),
None => "**/*".to_string(),
};
// TODO: add params for maximum depth and following symlinks, then pass them to this builder
GlobWalkerBuilder::from_patterns(self.get_directory()?, &[pattern])
.build()
.map_err(|e| anyhow!(e))

let glob_walker_builder = self
.add_glob_min_depth(GlobWalkerBuilder::from_patterns(
self.get_directory()?,
&[pattern],
))
.and_then(|builder| self.add_glob_max_depth(builder))
.and_then(|builder| self.add_glob_follow_links(builder))?;

glob_walker_builder.build().map_err(|e| anyhow!(e))
}
}
2 changes: 1 addition & 1 deletion src/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ fn scan(app_opts: &Params) -> Result<Vec<File>> {
hash: None,
size: Some(fs::metadata(fpath).unwrap().len()),
})
.filter(|file| filters::is_file_gt_minsize(app_opts, file))
.filter(|file| filters::is_file_gt_min_size(app_opts, file))
.collect();

Ok(files)
Expand Down

0 comments on commit ab44d1e

Please sign in to comment.